[clang] b1d4746 - [Hexagon] Change HVX vector predicate types from v512/1024i1 to v64/128i1

Krzysztof Parzyszek via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 19 12:15:29 PST 2020


Author: Krzysztof Parzyszek
Date: 2020-02-19T14:14:56-06:00
New Revision: b1d47467e26142e6029e9ec7ca5c42645ffaa7bb

URL: https://github.com/llvm/llvm-project/commit/b1d47467e26142e6029e9ec7ca5c42645ffaa7bb
DIFF: https://github.com/llvm/llvm-project/commit/b1d47467e26142e6029e9ec7ca5c42645ffaa7bb.diff

LOG: [Hexagon] Change HVX vector predicate types from v512/1024i1 to v64/128i1

This commit removes the artificial types <512 x i1> and <1024 x i1>
from HVX intrinsics, and makes v512i1 and v1024i1 no longer legal on
Hexagon.

It may cause existing bitcode files to become invalid.

* Converting between vector predicates and vector registers must be
  done explicitly via vandvrt/vandqrt instructions (their intrinsics),
  i.e. (for 64-byte mode):
    %Q = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %V, i32 -1)
    %V = call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %Q, i32 -1)

  The conversion intrinsics are:
    declare  <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
    declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
    declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32)
    declare <32 x i32> @llvm.hexagon.V6.vandqrt.128B(<128 x i1>, i32)
  They are all pure.

* Vector predicate values cannot be loaded/stored directly. This directly
  reflects the architecture restriction. Loading and storing or vector
  predicates must be done indirectly via vector registers and explicit
  conversions via vandvrt/vandqrt instructions.

Added: 
    clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def

Modified: 
    clang/include/clang/Basic/BuiltinsHexagon.def
    clang/include/clang/Basic/BuiltinsHexagonDep.def
    clang/include/clang/module.modulemap
    clang/lib/Basic/Targets/Hexagon.h
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/builtins-hexagon-v66-128B.c
    clang/test/CodeGen/builtins-hexagon-v66.c
    clang/test/CodeGen/builtins-hvx128.c
    clang/test/CodeGen/builtins-hvx64.c
    llvm/include/llvm/IR/IntrinsicsHexagon.td
    llvm/include/llvm/IR/IntrinsicsHexagonDep.td
    llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
    llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
    llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
    llvm/lib/Target/Hexagon/HexagonIntrinsics.td
    llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
    llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
    llvm/lib/Target/Hexagon/HexagonSubtarget.h
    llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
    llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
    llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
    llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
    llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
    llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
    llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
    llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
    llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
    llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
    llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
    llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
    llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
    llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
    llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
    llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
    llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
    llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
    llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
    llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
    llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
    llvm/test/CodeGen/Hexagon/late_instr.ll
    llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
    llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
    llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
    llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
    llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
    llvm/test/CodeGen/Hexagon/split-vecpred.ll
    llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
    llvm/test/CodeGen/Hexagon/swp-sigma.ll
    llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
    llvm/test/CodeGen/Hexagon/v6-spill1.ll
    llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
    llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
    llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
    llvm/test/CodeGen/Hexagon/v60-vsel1.ll
    llvm/test/CodeGen/Hexagon/v60-vsel2.ll
    llvm/test/CodeGen/Hexagon/v60Intrins.ll
    llvm/test/CodeGen/Hexagon/v60_sort16.ll
    llvm/test/CodeGen/Hexagon/v60small.ll
    llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
    llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
    llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
    llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
    llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
    llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
    llvm/test/CodeGen/Hexagon/vect-downscale.ll
    llvm/test/CodeGen/Hexagon/vector-align.ll
    llvm/test/CodeGen/Hexagon/vselect-pseudo.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsHexagon.def b/clang/include/clang/Basic/BuiltinsHexagon.def
index 40fdbeffdf51..28aa222166f5 100644
--- a/clang/include/clang/Basic/BuiltinsHexagon.def
+++ b/clang/include/clang/Basic/BuiltinsHexagon.def
@@ -96,14 +96,14 @@ TARGET_BUILTIN(__builtin_HEXAGON_S2_storerd_pcr, "vv*iLLivC*", "", V5)
 TARGET_BUILTIN(__builtin_HEXAGON_prefetch,"vv*","", V5)
 TARGET_BUILTIN(__builtin_HEXAGON_A6_vminub_RdP,"LLiLLiLLi","", V62)
 
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV32iv*V32i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV32iv*V32i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV32iv*V32i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV32iv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV128bv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV128bv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV128bv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV128bv*V32i","", HVXV60)
 
 
 // These are only valid on v65

diff  --git a/clang/include/clang/Basic/BuiltinsHexagonDep.def b/clang/include/clang/Basic/BuiltinsHexagonDep.def
index 93fa373244d7..b694e4c35d3b 100644
--- a/clang/include/clang/Basic/BuiltinsHexagonDep.def
+++ b/clang/include/clang/Basic/BuiltinsHexagonDep.def
@@ -924,14 +924,14 @@ TARGET_BUILTIN(__builtin_HEXAGON_F2_dfmpyhh, "dddd", "", V67)
 
 // V60 HVX Instructions.
 
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B, "vV32iv*V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B, "vV32iv*V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B, "vV32iv*V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B, "vV32iv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B, "vV128bv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B, "vV128bv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B, "vV128bv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B, "vV128bv*V32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_valignb, "V16iV16iV16ii", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_valignb_128B, "V32iV32iV32ii", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vlalignb, "V16iV16iV16ii", "", HVXV60)
@@ -1212,30 +1212,30 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubuhw, "V32iV16iV16i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubuhw_128B, "V64iV32iV32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vd0, "V16i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vd0_128B, "V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq_128B, "V32iV32iV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq_128B, "V32iV128bV32iV32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh, "V16iV16i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh_128B, "V32iV32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh_sat, "V16iV16i", "", HVXV60)
@@ -1346,104 +1346,104 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vxor, "V16iV16iV16i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vxor_128B, "V32iV32iV32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vnot, "V16iV16i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vnot_128B, "V32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt, "V16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_128B, "V32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc, "V16iV16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc_128B, "V32iV32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt, "V16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_128B, "V32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc, "V16iV16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc_128B, "V32iV32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not, "V16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not_128B, "V32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2, "V16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2_128B, "V32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap, "V32iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap_128B, "V64iV32iV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt, "V16iV64bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_128B, "V32iV128bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc, "V16iV16iV64bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc_128B, "V32iV32iV128bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt, "V64bV16ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_128B, "V128bV32ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc, "V64bV64bV16ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc_128B, "V128bV128bV32ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not, "V64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not_128B, "V128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2, "V64bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2_128B, "V128bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap, "V32iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap_128B, "V64iV128bV32iV32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxub, "V16iV16iV16i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxub_128B, "V32iV32iV32i", "", HVXV60)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vminub, "V16iV16iV16i", "", HVXV60)
@@ -1585,20 +1585,20 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub, "V16iV16ii", "", HVXV62)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_128B, "V32iV32ii", "", HVXV62)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc, "V16iV16iV16ii", "", HVXV62)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc_128B, "V32iV32iV32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt, "V16iV16ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B, "V32iV32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc, "V16iV16iV16ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B, "V32iV32iV32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B, "V32iV32iV32i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B, "V32iV32iV32i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2, "V16ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B, "V32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B, "V32iV32iV32i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B, "V32iV32iV32i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt, "V16iV64bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B, "V32iV128bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc, "V16iV16iV64bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B, "V32iV32iV128bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv, "V16iV64bV16i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B, "V32iV128bV32i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv, "V16iV64bV16i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B, "V32iV128bV32i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2, "V64bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B, "V128bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw, "V64bV64bV64b", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B, "V128bV128bV128b", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh, "V64bV64bV64b", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B, "V128bV128bV128b", "", HVXV62)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxb, "V16iV16iV16i", "", HVXV62)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxb_128B, "V32iV32iV32i", "", HVXV62)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vminb, "V16iV16iV16i", "", HVXV62)
@@ -1678,12 +1678,12 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermh, "vv*iiV16i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermh_128B, "vv*iiV32i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhw, "vv*iiV32i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhw_128B, "vv*iiV64i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq, "vv*V16iiiV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B, "vv*V32iiiV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq, "vv*V16iiiV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B, "vv*V32iiiV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq, "vv*V16iiiV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B, "vv*V32iiiV64i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq, "vv*V64biiV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B, "vv*V128biiV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq, "vv*V64biiV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B, "vv*V128biiV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq, "vv*V64biiV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B, "vv*V128biiV64i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw, "viiV16iV16i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_128B, "viiV32iV32i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh, "viiV16iV16i", "", HVXV65)
@@ -1692,22 +1692,22 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_add, "viiV16iV16i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_add_128B, "viiV32iV32i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh_add, "viiV16iV16i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh_add_128B, "viiV32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq, "vV16iiiV16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B, "vV32iiiV32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq, "vV16iiiV16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B, "vV32iiiV32iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq, "vV64biiV16iV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B, "vV128biiV32iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq, "vV64biiV16iV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B, "vV128biiV32iV32i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw, "viiV32iV16i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_128B, "viiV64iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq, "vV16iiiV32iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B, "vV32iiiV64iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq, "vV64biiV32iV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B, "vV128biiV64iV32i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add, "viiV32iV16i", "", HVXV65)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add_128B, "viiV64iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb, "V16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B, "V32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh, "V16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B, "V32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw, "V16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B, "V32iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb, "V16iV64b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B, "V32iV128b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh, "V16iV64b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B, "V32iV128b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw, "V16iV64b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B, "V32iV128b", "", HVXV65)
 
 // V66 HVX Instructions.
 
@@ -1715,7 +1715,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrotr, "V16iV16iV16i", "", HVXV66)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vrotr_128B, "V32iV32iV32i", "", HVXV66)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vasr_into, "V32iV32iV16iV16i", "", HVXV66)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vasr_into_128B, "V64iV64iV32iV32i", "", HVXV66)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat, "V16iV16iV16iV16i", "", HVXV66)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat_128B, "V32iV32iV32iV32i", "", HVXV66)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat, "V16iV16iV16iV64b", "", HVXV66)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat_128B, "V32iV32iV32iV128b", "", HVXV66)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vsatdw, "V16iV16iV16i", "", HVXV66)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vsatdw_128B, "V32iV32iV32i", "", HVXV66)

diff  --git a/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def
new file mode 100644
index 000000000000..9478a1b3fd14
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def
@@ -0,0 +1,206 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//===----------------------------------------------------------------------===//
+
+CUSTOM_BUILTIN_MAPPING(M2_mpysmi, 0)
+CUSTOM_BUILTIN_MAPPING(M2_dpmpyss_s0, 0)
+CUSTOM_BUILTIN_MAPPING(M2_dpmpyuu_s0, 0)
+CUSTOM_BUILTIN_MAPPING(M2_mpyi, 0)
+CUSTOM_BUILTIN_MAPPING(M2_mpyui, 0)
+CUSTOM_BUILTIN_MAPPING(A2_add, 0)
+CUSTOM_BUILTIN_MAPPING(A2_sub, 0)
+CUSTOM_BUILTIN_MAPPING(A2_addi, 0)
+CUSTOM_BUILTIN_MAPPING(A2_addp, 0)
+CUSTOM_BUILTIN_MAPPING(A2_subp, 0)
+CUSTOM_BUILTIN_MAPPING(A2_neg, 0)
+CUSTOM_BUILTIN_MAPPING(A2_zxtb, 0)
+CUSTOM_BUILTIN_MAPPING(A2_sxtb, 0)
+CUSTOM_BUILTIN_MAPPING(A2_zxth, 0)
+CUSTOM_BUILTIN_MAPPING(A2_sxth, 0)
+CUSTOM_BUILTIN_MAPPING(A2_and, 0)
+CUSTOM_BUILTIN_MAPPING(A2_or, 0)
+CUSTOM_BUILTIN_MAPPING(A2_xor, 0)
+CUSTOM_BUILTIN_MAPPING(A2_not, 0)
+CUSTOM_BUILTIN_MAPPING(A2_subri, 0)
+CUSTOM_BUILTIN_MAPPING(A2_andir, 0)
+CUSTOM_BUILTIN_MAPPING(A2_orir, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asr_i_r, 0)
+CUSTOM_BUILTIN_MAPPING(S2_lsr_i_r, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asl_i_r, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asr_i_p, 0)
+CUSTOM_BUILTIN_MAPPING(S2_lsr_i_p, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asl_i_p, 0)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_not, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_not_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and_n, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and_n_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or_n, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or_n_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vmux, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vmux_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vswap, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vswap_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarry, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarry_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubcarry, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubcarry_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvqv, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvqv_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvnqv, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvnqv_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqb, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqb_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat_128B, 128)

diff  --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap
index f36fc6bd55a4..af1322acc289 100644
--- a/clang/include/clang/module.modulemap
+++ b/clang/include/clang/module.modulemap
@@ -39,6 +39,7 @@ module Clang_Basic {
   textual header "Basic/Builtins.def"
   textual header "Basic/BuiltinsHexagon.def"
   textual header "Basic/BuiltinsHexagonDep.def"
+  textual header "Basic/BuiltinsHexagonMapCustomDep.def"
   textual header "Basic/BuiltinsLe64.def"
   textual header "Basic/BuiltinsMips.def"
   textual header "Basic/BuiltinsNEON.def"

diff  --git a/clang/lib/Basic/Targets/Hexagon.h b/clang/lib/Basic/Targets/Hexagon.h
index 2a72825e3c5a..f58f594b104f 100644
--- a/clang/lib/Basic/Targets/Hexagon.h
+++ b/clang/lib/Basic/Targets/Hexagon.h
@@ -57,6 +57,12 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo {
     LargeArrayAlign = 64;
     UseBitFieldTypeAlignment = true;
     ZeroLengthBitfieldBoundary = 32;
+
+    // These are the default values anyway, but explicitly make sure
+    // that the size of the boolean type is 8 bits. Bool vectors are used
+    // for modeling predicate registers in HVX, and the bool -> byte
+    // correspondence matches the HVX architecture.
+    BoolWidth = BoolAlign = 8;
   }
 
   ArrayRef<Builtin::Info> getTargetBuiltins() const override;

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 401c4d8e0539..bda01c6598a0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15007,10 +15007,46 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
   }
 }
 
+static std::pair<Intrinsic::ID, unsigned>
+getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) {
+  struct Info {
+    unsigned BuiltinID;
+    Intrinsic::ID IntrinsicID;
+    unsigned VecLen;
+  };
+  Info Infos[] = {
+#define CUSTOM_BUILTIN_MAPPING(x,s) \
+  { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
+    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
+#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
+#undef CUSTOM_BUILTIN_MAPPING
+  };
+
+  auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
+  static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
+  (void)SortOnce;
+
+  const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
+                                   Info{BuiltinID, 0, 0}, CmpInfo);
+  if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
+    return {Intrinsic::not_intrinsic, 0};
+
+  return {F->IntrinsicID, F->VecLen};
+}
+
 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
                                                const CallExpr *E) {
   SmallVector<llvm::Value *, 4> Ops;
-  Intrinsic::ID ID = Intrinsic::not_intrinsic;
+  Intrinsic::ID ID;
+  unsigned VecLen;
+  std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID);
 
   auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
     // The base pointer is passed by address, so it needs to be loaded.
@@ -15099,51 +15135,41 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateExtractValue(Result, 1);
   };
 
+  auto V2Q = [this, VecLen] (llvm::Value *Vec) {
+    Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
+                                     : Intrinsic::hexagon_V6_vandvrt;
+    return Builder.CreateCall(CGM.getIntrinsic(ID),
+                              {Vec, Builder.getInt32(-1)});
+  };
+  auto Q2V = [this, VecLen] (llvm::Value *Pred) {
+    Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
+                                     : Intrinsic::hexagon_V6_vandqrt;
+    return Builder.CreateCall(CGM.getIntrinsic(ID),
+                              {Pred, Builder.getInt32(-1)});
+  };
+
   switch (BuiltinID) {
+  // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
+  // and the corresponding C/C++ builtins use loads/stores to update
+  // the predicate.
   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
-  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
-    Address Dest = EmitPointerWithAlignment(E->getArg(2));
-    unsigned Size;
-    if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
-      Size = 512;
-      ID = Intrinsic::hexagon_V6_vaddcarry;
-    } else {
-      Size = 1024;
-      ID = Intrinsic::hexagon_V6_vaddcarry_128B;
-    }
-    Dest = Builder.CreateBitCast(Dest,
-        llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
-    LoadInst *QLd = Builder.CreateLoad(Dest);
-    Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
-    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
-    llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
-    llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
-                                              Vprd->getType()->getPointerTo(0));
-    Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
-    return Builder.CreateExtractValue(Result, 0);
-  }
+  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
-    Address Dest = EmitPointerWithAlignment(E->getArg(2));
-    unsigned Size;
-    if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
-      Size = 512;
-      ID = Intrinsic::hexagon_V6_vsubcarry;
-    } else {
-      Size = 1024;
-      ID = Intrinsic::hexagon_V6_vsubcarry_128B;
-    }
-    Dest = Builder.CreateBitCast(Dest,
-        llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
-    LoadInst *QLd = Builder.CreateLoad(Dest);
-    Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
-    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
-    llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
-    llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
-                                              Vprd->getType()->getPointerTo(0));
-    Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+    // Get the type from the 0-th argument.
+    llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
+    Address PredAddr = Builder.CreateBitCast(
+        EmitPointerWithAlignment(E->getArg(2)), VecType->getPointerTo(0));
+    llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
+    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
+        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
+
+    llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
+    Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
+        PredAddr.getAlignment());
     return Builder.CreateExtractValue(Result, 0);
   }
+
   case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
     return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
   case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
@@ -15200,8 +15226,38 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
     return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
   case Hexagon::BI__builtin_brev_ldd:
     return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
-  default:
-    break;
+  default: {
+    if (ID == Intrinsic::not_intrinsic)
+      return nullptr;
+
+    auto IsVectorPredTy = [] (llvm::Type *T) {
+      return T->isVectorTy() && T->getVectorElementType()->isIntegerTy(1);
+    };
+
+    llvm::Function *IntrFn = CGM.getIntrinsic(ID);
+    llvm::FunctionType *IntrTy = IntrFn->getFunctionType();
+    SmallVector<llvm::Value*,4> Ops;
+    for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) {
+      llvm::Type *T = IntrTy->getParamType(i);
+      const Expr *A = E->getArg(i);
+      if (IsVectorPredTy(T)) {
+        // There will be an implicit cast to a boolean vector. Strip it.
+        if (auto *Cast = dyn_cast<ImplicitCastExpr>(A)) {
+          if (Cast->getCastKind() == CK_BitCast)
+            A = Cast->getSubExpr();
+        }
+        Ops.push_back(V2Q(EmitScalarExpr(A)));
+      } else {
+        Ops.push_back(EmitScalarExpr(A));
+      }
+    }
+
+    llvm::Value *Call = Builder.CreateCall(IntrFn, Ops);
+    if (IsVectorPredTy(IntrTy->getReturnType()))
+      Call = Q2V(Call);
+
+    return Call;
+  } // default
   } // switch
 
   return nullptr;

diff  --git a/clang/test/CodeGen/builtins-hexagon-v66-128B.c b/clang/test/CodeGen/builtins-hexagon-v66-128B.c
index a1c4786cf24c..074728ec07ec 100644
--- a/clang/test/CodeGen/builtins-hexagon-v66-128B.c
+++ b/clang/test/CodeGen/builtins-hexagon-v66-128B.c
@@ -9,7 +9,7 @@ typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256)))
   __attribute__((aligned(256)));
 
 // CHECK-LABEL: @test1
-// CHECK: call <32 x i32> @llvm.hexagon.V6.vaddcarrysat.128B(<32 x i32> %{{[0-9]+}}, <32 x i32> %{{[0-9]+}}, <1024 x i1> %{{[0-9]+}})
+// CHECK: call <32 x i32> @llvm.hexagon.V6.vaddcarrysat.128B(<32 x i32> %{{[0-9]+}}, <32 x i32> %{{[0-9]+}}, <128 x i1> %{{[0-9]+}})
 HEXAGON_Vect1024 test1(void *in, void *out) {
   HEXAGON_Vect1024 v1, v2;
   HEXAGON_Vect1024 *p;

diff  --git a/clang/test/CodeGen/builtins-hexagon-v66.c b/clang/test/CodeGen/builtins-hexagon-v66.c
index 1382f18b4faf..767f9faf7702 100644
--- a/clang/test/CodeGen/builtins-hexagon-v66.c
+++ b/clang/test/CodeGen/builtins-hexagon-v66.c
@@ -33,7 +33,7 @@ typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128)))
   __attribute__((aligned(128)));
 
 // CHECK-LABEL: @test5
-// CHECK: call <16 x i32> @llvm.hexagon.V6.vaddcarrysat(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <512 x i1> %{{[0-9]+}})
+// CHECK: call <16 x i32> @llvm.hexagon.V6.vaddcarrysat(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <64 x i1> %{{[0-9]+}})
 HEXAGON_Vect512 test5(void *in, void *out) {
   HEXAGON_Vect512 v1, v2;
   HEXAGON_Vect512 *p;

diff  --git a/clang/test/CodeGen/builtins-hvx128.c b/clang/test/CodeGen/builtins-hvx128.c
index 07d0e050ddc0..d61afdefc2ae 100644
--- a/clang/test/CodeGen/builtins-hvx128.c
+++ b/clang/test/CodeGen/builtins-hvx128.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length128b -emit-llvm %s -o - | FileCheck %s
 
 void test() {
+  int q128 __attribute__((__vector_size__(128)));
   int v128 __attribute__((__vector_size__(128)));
   int v256 __attribute__((__vector_size__(256)));
 
@@ -18,33 +19,33 @@ void test() {
   // CHECK: @llvm.hexagon.V6.lvsplatw.128B
   __builtin_HEXAGON_V6_lvsplatw_128B(0);
   // CHECK: @llvm.hexagon.V6.pred.and.128B
-  __builtin_HEXAGON_V6_pred_and_128B(v128, v128);
+  __builtin_HEXAGON_V6_pred_and_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.pred.and.n.128B
-  __builtin_HEXAGON_V6_pred_and_n_128B(v128, v128);
+  __builtin_HEXAGON_V6_pred_and_n_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.pred.not.128B
-  __builtin_HEXAGON_V6_pred_not_128B(v128);
+  __builtin_HEXAGON_V6_pred_not_128B(q128);
   // CHECK: @llvm.hexagon.V6.pred.or.128B
-  __builtin_HEXAGON_V6_pred_or_128B(v128, v128);
+  __builtin_HEXAGON_V6_pred_or_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.pred.or.n.128B
-  __builtin_HEXAGON_V6_pred_or_n_128B(v128, v128);
+  __builtin_HEXAGON_V6_pred_or_n_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.pred.scalar2.128B
   __builtin_HEXAGON_V6_pred_scalar2_128B(0);
   // CHECK: @llvm.hexagon.V6.pred.scalar2v2.128B
   __builtin_HEXAGON_V6_pred_scalar2v2_128B(0);
   // CHECK: @llvm.hexagon.V6.pred.xor.128B
-  __builtin_HEXAGON_V6_pred_xor_128B(v128, v128);
+  __builtin_HEXAGON_V6_pred_xor_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.shuffeqh.128B
-  __builtin_HEXAGON_V6_shuffeqh_128B(v128, v128);
+  __builtin_HEXAGON_V6_shuffeqh_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.shuffeqw.128B
-  __builtin_HEXAGON_V6_shuffeqw_128B(v128, v128);
+  __builtin_HEXAGON_V6_shuffeqw_128B(q128, q128);
   // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai.128B
-  __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B
-  __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B
-  __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai.128B
-  __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vabsb.128B
   __builtin_HEXAGON_V6_vabsb_128B(v128);
   // CHECK: @llvm.hexagon.V6.vabsb.sat.128B
@@ -70,9 +71,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vaddb.dv.128B
   __builtin_HEXAGON_V6_vaddb_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vaddbnq.128B
-  __builtin_HEXAGON_V6_vaddbnq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vaddbnq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddbq.128B
-  __builtin_HEXAGON_V6_vaddbq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vaddbq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddbsat.128B
   __builtin_HEXAGON_V6_vaddbsat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddbsat.dv.128B
@@ -88,9 +89,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vaddh.dv.128B
   __builtin_HEXAGON_V6_vaddh_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vaddhnq.128B
-  __builtin_HEXAGON_V6_vaddhnq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vaddhnq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddhq.128B
-  __builtin_HEXAGON_V6_vaddhq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vaddhq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddhsat.128B
   __builtin_HEXAGON_V6_vaddhsat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B
@@ -126,9 +127,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vaddw.dv.128B
   __builtin_HEXAGON_V6_vaddw_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vaddwnq.128B
-  __builtin_HEXAGON_V6_vaddwnq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vaddwnq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddwq.128B
-  __builtin_HEXAGON_V6_vaddwq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vaddwq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddwsat.128B
   __builtin_HEXAGON_V6_vaddwsat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B
@@ -140,21 +141,21 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vand.128B
   __builtin_HEXAGON_V6_vand_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vandnqrt.128B
-  __builtin_HEXAGON_V6_vandnqrt_128B(v128, 0);
+  __builtin_HEXAGON_V6_vandnqrt_128B(q128, 0);
   // CHECK: @llvm.hexagon.V6.vandnqrt.acc.128B
-  __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, v128, 0);
+  __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, q128, 0);
   // CHECK: @llvm.hexagon.V6.vandqrt.128B
-  __builtin_HEXAGON_V6_vandqrt_128B(v128, 0);
+  __builtin_HEXAGON_V6_vandqrt_128B(q128, 0);
   // CHECK: @llvm.hexagon.V6.vandqrt.acc.128B
-  __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, v128, 0);
+  __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, q128, 0);
   // CHECK: @llvm.hexagon.V6.vandvnqv.128B
-  __builtin_HEXAGON_V6_vandvnqv_128B(v128, v128);
+  __builtin_HEXAGON_V6_vandvnqv_128B(q128, v128);
   // CHECK: @llvm.hexagon.V6.vandvqv.128B
-  __builtin_HEXAGON_V6_vandvqv_128B(v128, v128);
+  __builtin_HEXAGON_V6_vandvqv_128B(q128, v128);
   // CHECK: @llvm.hexagon.V6.vandvrt.128B
   __builtin_HEXAGON_V6_vandvrt_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vandvrt.acc.128B
-  __builtin_HEXAGON_V6_vandvrt_acc_128B(v128, v128, 0);
+  __builtin_HEXAGON_V6_vandvrt_acc_128B(q128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vaslh.128B
   __builtin_HEXAGON_V6_vaslh_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vaslh.acc.128B
@@ -296,87 +297,87 @@ void test() {
   // CHECK: @llvm.hexagon.V6.veqb.128B
   __builtin_HEXAGON_V6_veqb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.veqb.and.128B
-  __builtin_HEXAGON_V6_veqb_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqb_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqb.or.128B
-  __builtin_HEXAGON_V6_veqb_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqb_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqb.xor.128B
-  __builtin_HEXAGON_V6_veqb_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqb_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqh.128B
   __builtin_HEXAGON_V6_veqh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.veqh.and.128B
-  __builtin_HEXAGON_V6_veqh_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqh_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqh.or.128B
-  __builtin_HEXAGON_V6_veqh_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqh_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqh.xor.128B
-  __builtin_HEXAGON_V6_veqh_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqh_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqw.128B
   __builtin_HEXAGON_V6_veqw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.veqw.and.128B
-  __builtin_HEXAGON_V6_veqw_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqw_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqw.or.128B
-  __builtin_HEXAGON_V6_veqw_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqw_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.veqw.xor.128B
-  __builtin_HEXAGON_V6_veqw_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_veqw_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgathermh.128B
   __builtin_HEXAGON_V6_vgathermh_128B(0, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgathermhq.128B
-  __builtin_HEXAGON_V6_vgathermhq_128B(0, v128, 0, 0, v128);
+  __builtin_HEXAGON_V6_vgathermhq_128B(0, q128, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgathermhw.128B
   __builtin_HEXAGON_V6_vgathermhw_128B(0, 0, 0, v256);
   // CHECK: @llvm.hexagon.V6.vgathermhwq.128B
-  __builtin_HEXAGON_V6_vgathermhwq_128B(0, v128, 0, 0, v256);
+  __builtin_HEXAGON_V6_vgathermhwq_128B(0, q128, 0, 0, v256);
   // CHECK: @llvm.hexagon.V6.vgathermw.128B
   __builtin_HEXAGON_V6_vgathermw_128B(0, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgathermwq.128B
-  __builtin_HEXAGON_V6_vgathermwq_128B(0, v128, 0, 0, v128);
+  __builtin_HEXAGON_V6_vgathermwq_128B(0, q128, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgtb.128B
   __builtin_HEXAGON_V6_vgtb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtb.and.128B
-  __builtin_HEXAGON_V6_vgtb_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtb_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtb.or.128B
-  __builtin_HEXAGON_V6_vgtb_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtb_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtb.xor.128B
-  __builtin_HEXAGON_V6_vgtb_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtb_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgth.128B
   __builtin_HEXAGON_V6_vgth_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vgth.and.128B
-  __builtin_HEXAGON_V6_vgth_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgth_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgth.or.128B
-  __builtin_HEXAGON_V6_vgth_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgth_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgth.xor.128B
-  __builtin_HEXAGON_V6_vgth_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgth_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtub.128B
   __builtin_HEXAGON_V6_vgtub_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtub.and.128B
-  __builtin_HEXAGON_V6_vgtub_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtub_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtub.or.128B
-  __builtin_HEXAGON_V6_vgtub_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtub_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtub.xor.128B
-  __builtin_HEXAGON_V6_vgtub_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtub_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuh.128B
   __builtin_HEXAGON_V6_vgtuh_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuh.and.128B
-  __builtin_HEXAGON_V6_vgtuh_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtuh_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuh.or.128B
-  __builtin_HEXAGON_V6_vgtuh_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtuh_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuh.xor.128B
-  __builtin_HEXAGON_V6_vgtuh_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtuh_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuw.128B
   __builtin_HEXAGON_V6_vgtuw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuw.and.128B
-  __builtin_HEXAGON_V6_vgtuw_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtuw_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuw.or.128B
-  __builtin_HEXAGON_V6_vgtuw_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtuw_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtuw.xor.128B
-  __builtin_HEXAGON_V6_vgtuw_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtuw_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtw.128B
   __builtin_HEXAGON_V6_vgtw_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtw.and.128B
-  __builtin_HEXAGON_V6_vgtw_and_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtw_and_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtw.or.128B
-  __builtin_HEXAGON_V6_vgtw_or_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtw_or_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vgtw.xor.128B
-  __builtin_HEXAGON_V6_vgtw_xor_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vgtw_xor_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vinsertwr.128B
   __builtin_HEXAGON_V6_vinsertwr_128B(v128, 0);
   // CHECK: @llvm.hexagon.V6.vlalignb.128B
@@ -416,13 +417,13 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vlutvwhi.128B
   __builtin_HEXAGON_V6_vlutvwhi_128B(v128, v128, 0);
   // CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B
-  __builtin_HEXAGON_V6_vmaskedstorenq_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B
-  __builtin_HEXAGON_V6_vmaskedstorentnq_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B
-  __builtin_HEXAGON_V6_vmaskedstorentq_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B
-  __builtin_HEXAGON_V6_vmaskedstoreq_128B(v128, 0, v128);
+  __builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128);
   // CHECK: @llvm.hexagon.V6.vmaxb.128B
   __builtin_HEXAGON_V6_vmaxb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vmaxh.128B
@@ -566,7 +567,7 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B
   __builtin_HEXAGON_V6_vmpyuhv_acc_128B(v256, v128, v128);
   // CHECK: @llvm.hexagon.V6.vmux.128B
-  __builtin_HEXAGON_V6_vmux_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vmux_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vnavgb.128B
   __builtin_HEXAGON_V6_vnavgb_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vnavgh.128B
@@ -602,11 +603,11 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vpopcounth.128B
   __builtin_HEXAGON_V6_vpopcounth_128B(v128);
   // CHECK: @llvm.hexagon.V6.vprefixqb.128B
-  __builtin_HEXAGON_V6_vprefixqb_128B(v128);
+  __builtin_HEXAGON_V6_vprefixqb_128B(q128);
   // CHECK: @llvm.hexagon.V6.vprefixqh.128B
-  __builtin_HEXAGON_V6_vprefixqh_128B(v128);
+  __builtin_HEXAGON_V6_vprefixqh_128B(q128);
   // CHECK: @llvm.hexagon.V6.vprefixqw.128B
-  __builtin_HEXAGON_V6_vprefixqw_128B(v128);
+  __builtin_HEXAGON_V6_vprefixqw_128B(q128);
   // CHECK: @llvm.hexagon.V6.vrdelta.128B
   __builtin_HEXAGON_V6_vrdelta_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vrmpybub.rtt.128B
@@ -676,19 +677,19 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vscattermh.add.128B
   __builtin_HEXAGON_V6_vscattermh_add_128B(0, 0, v128, v128);
   // CHECK: @llvm.hexagon.V6.vscattermhq.128B
-  __builtin_HEXAGON_V6_vscattermhq_128B(v128, 0, 0, v128, v128);
+  __builtin_HEXAGON_V6_vscattermhq_128B(q128, 0, 0, v128, v128);
   // CHECK: @llvm.hexagon.V6.vscattermhw.128B
   __builtin_HEXAGON_V6_vscattermhw_128B(0, 0, v256, v128);
   // CHECK: @llvm.hexagon.V6.vscattermhw.add.128B
   __builtin_HEXAGON_V6_vscattermhw_add_128B(0, 0, v256, v128);
   // CHECK: @llvm.hexagon.V6.vscattermhwq.128B
-  __builtin_HEXAGON_V6_vscattermhwq_128B(v128, 0, 0, v256, v128);
+  __builtin_HEXAGON_V6_vscattermhwq_128B(q128, 0, 0, v256, v128);
   // CHECK: @llvm.hexagon.V6.vscattermw.128B
   __builtin_HEXAGON_V6_vscattermw_128B(0, 0, v128, v128);
   // CHECK: @llvm.hexagon.V6.vscattermw.add.128B
   __builtin_HEXAGON_V6_vscattermw_add_128B(0, 0, v128, v128);
   // CHECK: @llvm.hexagon.V6.vscattermwq.128B
-  __builtin_HEXAGON_V6_vscattermwq_128B(v128, 0, 0, v128, v128);
+  __builtin_HEXAGON_V6_vscattermwq_128B(q128, 0, 0, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsh.128B
   __builtin_HEXAGON_V6_vsh_128B(v128);
   // CHECK: @llvm.hexagon.V6.vshufeh.128B
@@ -714,9 +715,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vsubb.dv.128B
   __builtin_HEXAGON_V6_vsubb_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vsubbnq.128B
-  __builtin_HEXAGON_V6_vsubbnq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vsubbnq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubbq.128B
-  __builtin_HEXAGON_V6_vsubbq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vsubbq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubbsat.128B
   __builtin_HEXAGON_V6_vsubbsat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubbsat.dv.128B
@@ -728,9 +729,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vsubh.dv.128B
   __builtin_HEXAGON_V6_vsubh_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vsubhnq.128B
-  __builtin_HEXAGON_V6_vsubhnq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vsubhnq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubhq.128B
-  __builtin_HEXAGON_V6_vsubhq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vsubhq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubhsat.128B
   __builtin_HEXAGON_V6_vsubhsat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B
@@ -760,15 +761,15 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vsubw.dv.128B
   __builtin_HEXAGON_V6_vsubw_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vsubwnq.128B
-  __builtin_HEXAGON_V6_vsubwnq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vsubwnq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubwq.128B
-  __builtin_HEXAGON_V6_vsubwq_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vsubwq_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubwsat.128B
   __builtin_HEXAGON_V6_vsubwsat_128B(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B
   __builtin_HEXAGON_V6_vsubwsat_dv_128B(v256, v256);
   // CHECK: @llvm.hexagon.V6.vswap.128B
-  __builtin_HEXAGON_V6_vswap_128B(v128, v128, v128);
+  __builtin_HEXAGON_V6_vswap_128B(q128, v128, v128);
   // CHECK: @llvm.hexagon.V6.vtmpyb.128B
   __builtin_HEXAGON_V6_vtmpyb_128B(v256, 0);
   // CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B

diff  --git a/clang/test/CodeGen/builtins-hvx64.c b/clang/test/CodeGen/builtins-hvx64.c
index 5a53296e7276..27d39990adb5 100644
--- a/clang/test/CodeGen/builtins-hvx64.c
+++ b/clang/test/CodeGen/builtins-hvx64.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length64b -emit-llvm %s -o - | FileCheck %s
 
 void test() {
+  int q64 __attribute__((__vector_size__(64)));
   int v64 __attribute__((__vector_size__(64)));
   int v128 __attribute__((__vector_size__(128)));
 
@@ -18,33 +19,33 @@ void test() {
   // CHECK: @llvm.hexagon.V6.lvsplatw
   __builtin_HEXAGON_V6_lvsplatw(0);
   // CHECK: @llvm.hexagon.V6.pred.and
-  __builtin_HEXAGON_V6_pred_and(v64, v64);
+  __builtin_HEXAGON_V6_pred_and(q64, q64);
   // CHECK: @llvm.hexagon.V6.pred.and.n
-  __builtin_HEXAGON_V6_pred_and_n(v64, v64);
+  __builtin_HEXAGON_V6_pred_and_n(q64, q64);
   // CHECK: @llvm.hexagon.V6.pred.not
-  __builtin_HEXAGON_V6_pred_not(v64);
+  __builtin_HEXAGON_V6_pred_not(q64);
   // CHECK: @llvm.hexagon.V6.pred.or
-  __builtin_HEXAGON_V6_pred_or(v64, v64);
+  __builtin_HEXAGON_V6_pred_or(q64, q64);
   // CHECK: @llvm.hexagon.V6.pred.or.n
-  __builtin_HEXAGON_V6_pred_or_n(v64, v64);
+  __builtin_HEXAGON_V6_pred_or_n(q64, q64);
   // CHECK: @llvm.hexagon.V6.pred.scalar2
   __builtin_HEXAGON_V6_pred_scalar2(0);
   // CHECK: @llvm.hexagon.V6.pred.scalar2v2
   __builtin_HEXAGON_V6_pred_scalar2v2(0);
   // CHECK: @llvm.hexagon.V6.pred.xor
-  __builtin_HEXAGON_V6_pred_xor(v64, v64);
+  __builtin_HEXAGON_V6_pred_xor(q64, q64);
   // CHECK: @llvm.hexagon.V6.shuffeqh
-  __builtin_HEXAGON_V6_shuffeqh(v64, v64);
+  __builtin_HEXAGON_V6_shuffeqh(q64, q64);
   // CHECK: @llvm.hexagon.V6.shuffeqw
-  __builtin_HEXAGON_V6_shuffeqw(v64, v64);
+  __builtin_HEXAGON_V6_shuffeqw(q64, q64);
   // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai
-  __builtin_HEXAGON_V6_vS32b_nqpred_ai(v64, 0, v64);
+  __builtin_HEXAGON_V6_vS32b_nqpred_ai(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai
-  __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(v64, 0, v64);
+  __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai
-  __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(v64, 0, v64);
+  __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai
-  __builtin_HEXAGON_V6_vS32b_qpred_ai(v64, 0, v64);
+  __builtin_HEXAGON_V6_vS32b_qpred_ai(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vabsb
   __builtin_HEXAGON_V6_vabsb(v64);
   // CHECK: @llvm.hexagon.V6.vabsb.sat
@@ -70,9 +71,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vaddb.dv
   __builtin_HEXAGON_V6_vaddb_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddbnq
-  __builtin_HEXAGON_V6_vaddbnq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vaddbnq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddbq
-  __builtin_HEXAGON_V6_vaddbq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vaddbq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddbsat
   __builtin_HEXAGON_V6_vaddbsat(v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddbsat.dv
@@ -88,9 +89,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vaddh.dv
   __builtin_HEXAGON_V6_vaddh_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddhnq
-  __builtin_HEXAGON_V6_vaddhnq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vaddhnq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddhq
-  __builtin_HEXAGON_V6_vaddhq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vaddhq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddhsat
   __builtin_HEXAGON_V6_vaddhsat(v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddhsat.dv
@@ -126,9 +127,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vaddw.dv
   __builtin_HEXAGON_V6_vaddw_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vaddwnq
-  __builtin_HEXAGON_V6_vaddwnq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vaddwnq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddwq
-  __builtin_HEXAGON_V6_vaddwq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vaddwq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddwsat
   __builtin_HEXAGON_V6_vaddwsat(v64, v64);
   // CHECK: @llvm.hexagon.V6.vaddwsat.dv
@@ -140,21 +141,21 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vand
   __builtin_HEXAGON_V6_vand(v64, v64);
   // CHECK: @llvm.hexagon.V6.vandnqrt
-  __builtin_HEXAGON_V6_vandnqrt(v64, 0);
+  __builtin_HEXAGON_V6_vandnqrt(q64, 0);
   // CHECK: @llvm.hexagon.V6.vandnqrt.acc
-  __builtin_HEXAGON_V6_vandnqrt_acc(v64, v64, 0);
+  __builtin_HEXAGON_V6_vandnqrt_acc(v64, q64, 0);
   // CHECK: @llvm.hexagon.V6.vandqrt
-  __builtin_HEXAGON_V6_vandqrt(v64, 0);
+  __builtin_HEXAGON_V6_vandqrt(q64, 0);
   // CHECK: @llvm.hexagon.V6.vandqrt.acc
-  __builtin_HEXAGON_V6_vandqrt_acc(v64, v64, 0);
+  __builtin_HEXAGON_V6_vandqrt_acc(v64, q64, 0);
   // CHECK: @llvm.hexagon.V6.vandvnqv
-  __builtin_HEXAGON_V6_vandvnqv(v64, v64);
+  __builtin_HEXAGON_V6_vandvnqv(q64, v64);
   // CHECK: @llvm.hexagon.V6.vandvqv
-  __builtin_HEXAGON_V6_vandvqv(v64, v64);
+  __builtin_HEXAGON_V6_vandvqv(q64, v64);
   // CHECK: @llvm.hexagon.V6.vandvrt
   __builtin_HEXAGON_V6_vandvrt(v64, 0);
   // CHECK: @llvm.hexagon.V6.vandvrt.acc
-  __builtin_HEXAGON_V6_vandvrt_acc(v64, v64, 0);
+  __builtin_HEXAGON_V6_vandvrt_acc(q64, v64, 0);
   // CHECK: @llvm.hexagon.V6.vaslh
   __builtin_HEXAGON_V6_vaslh(v64, 0);
   // CHECK: @llvm.hexagon.V6.vaslh.acc
@@ -296,87 +297,87 @@ void test() {
   // CHECK: @llvm.hexagon.V6.veqb
   __builtin_HEXAGON_V6_veqb(v64, v64);
   // CHECK: @llvm.hexagon.V6.veqb.and
-  __builtin_HEXAGON_V6_veqb_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqb_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqb.or
-  __builtin_HEXAGON_V6_veqb_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqb_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqb.xor
-  __builtin_HEXAGON_V6_veqb_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqb_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqh
   __builtin_HEXAGON_V6_veqh(v64, v64);
   // CHECK: @llvm.hexagon.V6.veqh.and
-  __builtin_HEXAGON_V6_veqh_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqh_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqh.or
-  __builtin_HEXAGON_V6_veqh_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqh_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqh.xor
-  __builtin_HEXAGON_V6_veqh_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqh_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqw
   __builtin_HEXAGON_V6_veqw(v64, v64);
   // CHECK: @llvm.hexagon.V6.veqw.and
-  __builtin_HEXAGON_V6_veqw_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqw_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqw.or
-  __builtin_HEXAGON_V6_veqw_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqw_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.veqw.xor
-  __builtin_HEXAGON_V6_veqw_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_veqw_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgathermh
   __builtin_HEXAGON_V6_vgathermh(0, 0, 0, v64);
   // CHECK: @llvm.hexagon.V6.vgathermhq
-  __builtin_HEXAGON_V6_vgathermhq(0, v64, 0, 0, v64);
+  __builtin_HEXAGON_V6_vgathermhq(0, q64, 0, 0, v64);
   // CHECK: @llvm.hexagon.V6.vgathermhw
   __builtin_HEXAGON_V6_vgathermhw(0, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgathermhwq
-  __builtin_HEXAGON_V6_vgathermhwq(0, v64, 0, 0, v128);
+  __builtin_HEXAGON_V6_vgathermhwq(0, q64, 0, 0, v128);
   // CHECK: @llvm.hexagon.V6.vgathermw
   __builtin_HEXAGON_V6_vgathermw(0, 0, 0, v64);
   // CHECK: @llvm.hexagon.V6.vgathermwq
-  __builtin_HEXAGON_V6_vgathermwq(0, v64, 0, 0, v64);
+  __builtin_HEXAGON_V6_vgathermwq(0, q64, 0, 0, v64);
   // CHECK: @llvm.hexagon.V6.vgtb
   __builtin_HEXAGON_V6_vgtb(v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtb.and
-  __builtin_HEXAGON_V6_vgtb_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtb_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtb.or
-  __builtin_HEXAGON_V6_vgtb_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtb_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtb.xor
-  __builtin_HEXAGON_V6_vgtb_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtb_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgth
   __builtin_HEXAGON_V6_vgth(v64, v64);
   // CHECK: @llvm.hexagon.V6.vgth.and
-  __builtin_HEXAGON_V6_vgth_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgth_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgth.or
-  __builtin_HEXAGON_V6_vgth_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgth_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgth.xor
-  __builtin_HEXAGON_V6_vgth_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgth_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtub
   __builtin_HEXAGON_V6_vgtub(v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtub.and
-  __builtin_HEXAGON_V6_vgtub_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtub_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtub.or
-  __builtin_HEXAGON_V6_vgtub_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtub_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtub.xor
-  __builtin_HEXAGON_V6_vgtub_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtub_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuh
   __builtin_HEXAGON_V6_vgtuh(v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuh.and
-  __builtin_HEXAGON_V6_vgtuh_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtuh_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuh.or
-  __builtin_HEXAGON_V6_vgtuh_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtuh_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuh.xor
-  __builtin_HEXAGON_V6_vgtuh_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtuh_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuw
   __builtin_HEXAGON_V6_vgtuw(v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuw.and
-  __builtin_HEXAGON_V6_vgtuw_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtuw_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuw.or
-  __builtin_HEXAGON_V6_vgtuw_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtuw_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtuw.xor
-  __builtin_HEXAGON_V6_vgtuw_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtuw_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtw
   __builtin_HEXAGON_V6_vgtw(v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtw.and
-  __builtin_HEXAGON_V6_vgtw_and(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtw_and(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtw.or
-  __builtin_HEXAGON_V6_vgtw_or(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtw_or(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vgtw.xor
-  __builtin_HEXAGON_V6_vgtw_xor(v64, v64, v64);
+  __builtin_HEXAGON_V6_vgtw_xor(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vinsertwr
   __builtin_HEXAGON_V6_vinsertwr(v64, 0);
   // CHECK: @llvm.hexagon.V6.vlalignb
@@ -416,13 +417,13 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vlutvwhi
   __builtin_HEXAGON_V6_vlutvwhi(v64, v64, 0);
   // CHECK: @llvm.hexagon.V6.vmaskedstorenq
-  __builtin_HEXAGON_V6_vmaskedstorenq(v64, 0, v64);
+  __builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vmaskedstorentnq
-  __builtin_HEXAGON_V6_vmaskedstorentnq(v64, 0, v64);
+  __builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vmaskedstorentq
-  __builtin_HEXAGON_V6_vmaskedstorentq(v64, 0, v64);
+  __builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vmaskedstoreq
-  __builtin_HEXAGON_V6_vmaskedstoreq(v64, 0, v64);
+  __builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64);
   // CHECK: @llvm.hexagon.V6.vmaxb
   __builtin_HEXAGON_V6_vmaxb(v64, v64);
   // CHECK: @llvm.hexagon.V6.vmaxh
@@ -566,7 +567,7 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vmpyuhv.acc
   __builtin_HEXAGON_V6_vmpyuhv_acc(v128, v64, v64);
   // CHECK: @llvm.hexagon.V6.vmux
-  __builtin_HEXAGON_V6_vmux(v64, v64, v64);
+  __builtin_HEXAGON_V6_vmux(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vnavgb
   __builtin_HEXAGON_V6_vnavgb(v64, v64);
   // CHECK: @llvm.hexagon.V6.vnavgh
@@ -602,11 +603,11 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vpopcounth
   __builtin_HEXAGON_V6_vpopcounth(v64);
   // CHECK: @llvm.hexagon.V6.vprefixqb
-  __builtin_HEXAGON_V6_vprefixqb(v64);
+  __builtin_HEXAGON_V6_vprefixqb(q64);
   // CHECK: @llvm.hexagon.V6.vprefixqh
-  __builtin_HEXAGON_V6_vprefixqh(v64);
+  __builtin_HEXAGON_V6_vprefixqh(q64);
   // CHECK: @llvm.hexagon.V6.vprefixqw
-  __builtin_HEXAGON_V6_vprefixqw(v64);
+  __builtin_HEXAGON_V6_vprefixqw(q64);
   // CHECK: @llvm.hexagon.V6.vrdelta
   __builtin_HEXAGON_V6_vrdelta(v64, v64);
   // CHECK: @llvm.hexagon.V6.vrmpybub.rtt
@@ -676,19 +677,19 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vscattermh.add
   __builtin_HEXAGON_V6_vscattermh_add(0, 0, v64, v64);
   // CHECK: @llvm.hexagon.V6.vscattermhq
-  __builtin_HEXAGON_V6_vscattermhq(v64, 0, 0, v64, v64);
+  __builtin_HEXAGON_V6_vscattermhq(q64, 0, 0, v64, v64);
   // CHECK: @llvm.hexagon.V6.vscattermhw
   __builtin_HEXAGON_V6_vscattermhw(0, 0, v128, v64);
   // CHECK: @llvm.hexagon.V6.vscattermhw.add
   __builtin_HEXAGON_V6_vscattermhw_add(0, 0, v128, v64);
   // CHECK: @llvm.hexagon.V6.vscattermhwq
-  __builtin_HEXAGON_V6_vscattermhwq(v64, 0, 0, v128, v64);
+  __builtin_HEXAGON_V6_vscattermhwq(q64, 0, 0, v128, v64);
   // CHECK: @llvm.hexagon.V6.vscattermw
   __builtin_HEXAGON_V6_vscattermw(0, 0, v64, v64);
   // CHECK: @llvm.hexagon.V6.vscattermw.add
   __builtin_HEXAGON_V6_vscattermw_add(0, 0, v64, v64);
   // CHECK: @llvm.hexagon.V6.vscattermwq
-  __builtin_HEXAGON_V6_vscattermwq(v64, 0, 0, v64, v64);
+  __builtin_HEXAGON_V6_vscattermwq(q64, 0, 0, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsh
   __builtin_HEXAGON_V6_vsh(v64);
   // CHECK: @llvm.hexagon.V6.vshufeh
@@ -714,9 +715,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vsubb.dv
   __builtin_HEXAGON_V6_vsubb_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubbnq
-  __builtin_HEXAGON_V6_vsubbnq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vsubbnq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubbq
-  __builtin_HEXAGON_V6_vsubbq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vsubbq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubbsat
   __builtin_HEXAGON_V6_vsubbsat(v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubbsat.dv
@@ -728,9 +729,9 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vsubh.dv
   __builtin_HEXAGON_V6_vsubh_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubhnq
-  __builtin_HEXAGON_V6_vsubhnq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vsubhnq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubhq
-  __builtin_HEXAGON_V6_vsubhq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vsubhq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubhsat
   __builtin_HEXAGON_V6_vsubhsat(v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubhsat.dv
@@ -760,15 +761,15 @@ void test() {
   // CHECK: @llvm.hexagon.V6.vsubw.dv
   __builtin_HEXAGON_V6_vsubw_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vsubwnq
-  __builtin_HEXAGON_V6_vsubwnq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vsubwnq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubwq
-  __builtin_HEXAGON_V6_vsubwq(v64, v64, v64);
+  __builtin_HEXAGON_V6_vsubwq(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubwsat
   __builtin_HEXAGON_V6_vsubwsat(v64, v64);
   // CHECK: @llvm.hexagon.V6.vsubwsat.dv
   __builtin_HEXAGON_V6_vsubwsat_dv(v128, v128);
   // CHECK: @llvm.hexagon.V6.vswap
-  __builtin_HEXAGON_V6_vswap(v64, v64, v64);
+  __builtin_HEXAGON_V6_vswap(q64, v64, v64);
   // CHECK: @llvm.hexagon.V6.vtmpyb
   __builtin_HEXAGON_V6_vtmpyb(v128, 0);
   // CHECK: @llvm.hexagon.V6.vtmpyb.acc

diff  --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index f82cac156eca..3e0e8fae7b93 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -258,44 +258,22 @@ Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
 // Masked vector stores
 //
 
-//
-// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai
-class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
-                          [IntrArgMemOnly]>;
-
-//
-// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai_128B
-class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
-                          [IntrArgMemOnly]>;
-
-def int_hexagon_V6_vmaskedstoreq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
-
-def int_hexagon_V6_vmaskedstorenq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
-
-def int_hexagon_V6_vmaskedstorentq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
-
-def int_hexagon_V6_vmaskedstorentnq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
-
-def int_hexagon_V6_vmaskedstoreq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
-
-def int_hexagon_V6_vmaskedstorenq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
-
-def int_hexagon_V6_vmaskedstorentq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
-
-def int_hexagon_V6_vmaskedstorentnq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
+class Hexagon_custom_vms_Intrinsic
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v64i1_ty,llvm_ptr_ty,llvm_v16i32_ty], [IntrWriteMem]>;
+
+class Hexagon_custom_vms_Intrinsic_128B
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v128i1_ty,llvm_ptr_ty,llvm_v32i32_ty], [IntrWriteMem]>;
+
+def int_hexagon_V6_vmaskedstoreq: Hexagon_custom_vms_Intrinsic;
+def int_hexagon_V6_vmaskedstorenq: Hexagon_custom_vms_Intrinsic;
+def int_hexagon_V6_vmaskedstorentq: Hexagon_custom_vms_Intrinsic;
+def int_hexagon_V6_vmaskedstorentnq: Hexagon_custom_vms_Intrinsic;
+
+def int_hexagon_V6_vmaskedstoreq_128B: Hexagon_custom_vms_Intrinsic_128B;
+def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
+def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
+def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
 
 include "llvm/IR/IntrinsicsHexagonDep.td"

diff  --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
index e81ac9ba8519..67a06f5c06f4 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
@@ -548,17 +548,17 @@ class Hexagon_v64i32_v64i32v32i32_Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vS32b_qpred_ai
-class Hexagon__v512i1ptrv16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v64i1ptrv16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v64i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vS32b_qpred_ai
-class Hexagon__v1024i1ptrv32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v128i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_valignb
@@ -660,31 +660,31 @@ class Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vaddcarrysat
-class Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v64i1_ty],
        intr_properties>;
 
 // tag : V6_vaddcarrysat
-class Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
        intr_properties>;
 
 // tag : V6_vaddcarry
-class Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic<
+class Hexagon_custom_v16i32v64i1_v16i32v16i32v64i1_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
   : Hexagon_NonGCC_Intrinsic<
-       [llvm_v16i32_ty,llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+       [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v64i1_ty],
        intr_properties>;
 
 // tag : V6_vaddcarry
-class Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B<
+class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
   : Hexagon_NonGCC_Intrinsic<
-       [llvm_v32i32_ty,llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+       [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
        intr_properties>;
 
 // tag : V6_vaddubh
@@ -702,17 +702,17 @@ class Hexagon_v16i32__Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vaddbq
-class Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vaddbq
-class Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vabsb
@@ -751,157 +751,157 @@ class Hexagon_v64i32_v64i32v32i32i32_Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vandqrt
-class Hexagon_v16i32_v512i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty], [llvm_v64i1_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandqrt
-class Hexagon_v32i32_v1024i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v128i1_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandqrt_acc
-class Hexagon_v16i32_v16i32v512i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v64i1_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandqrt_acc
-class Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v128i1_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandvrt
-class Hexagon_v512i1_v16i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v16i32i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandvrt
-class Hexagon_v1024i1_v32i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandvrt_acc
-class Hexagon_v512i1_v512i1v16i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandvrt_acc
-class Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_vandvqv
-class Hexagon_v16i32_v512i1v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty], [llvm_v64i1_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vandvqv
-class Hexagon_v32i32_v1024i1v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v128i1_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vgtw
-class Hexagon_v512i1_v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v16i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vgtw
-class Hexagon_v1024i1_v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vgtw_and
-class Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vgtw_and
-class Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_pred_scalar2
-class Hexagon_v512i1_i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_pred_scalar2
-class Hexagon_v1024i1_i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_i32_ty],
        intr_properties>;
 
 // tag : V6_shuffeqw
-class Hexagon_v512i1_v512i1v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1v64i1_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_v64i1_ty,llvm_v64i1_ty],
        intr_properties>;
 
 // tag : V6_shuffeqw
-class Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_v128i1_ty,llvm_v128i1_ty],
        intr_properties>;
 
 // tag : V6_pred_not
-class Hexagon_v512i1_v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v512i1_ty], [llvm_v512i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i1_ty], [llvm_v64i1_ty],
        intr_properties>;
 
 // tag : V6_pred_not
-class Hexagon_v1024i1_v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v1024i1_ty], [llvm_v1024i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v128i1_ty], [llvm_v128i1_ty],
        intr_properties>;
 
 // tag : V6_vswap
-class Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vswap
-class Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v64i32_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vshuffvdd
@@ -982,31 +982,31 @@ class Hexagon__ptri32i32v64i32_Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vgathermwq
-class Hexagon__ptrv512i1i32i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_ptr_ty,llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vgathermwq
-class Hexagon__ptrv1024i1i32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_ptr_ty,llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vgathermhwq
-class Hexagon__ptrv512i1i32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_ptr_ty,llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vgathermhwq
-class Hexagon__ptrv1024i1i32i32v64i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_ptr_ty,llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty],
        intr_properties>;
 
 // tag : V6_vscattermw
@@ -1024,17 +1024,17 @@ class Hexagon__i32i32v32i32v32i32_Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vscattermwq
-class Hexagon__v512i1i32i32v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vscattermwq
-class Hexagon__v1024i1i32i32v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vscattermhw
@@ -1052,31 +1052,31 @@ class Hexagon__i32i32v64i32v32i32_Intrinsic<string GCCIntSuffix,
        intr_properties>;
 
 // tag : V6_vscattermhwq
-class Hexagon__v512i1i32i32v32i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v16i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
 // tag : V6_vscattermhwq
-class Hexagon__v1024i1i32i32v64i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [], [llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty,llvm_v32i32_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [], [llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
 // tag : V6_vprefixqb
-class Hexagon_v16i32_v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1_Intrinsic<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v16i32_ty], [llvm_v512i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty], [llvm_v64i1_ty],
        intr_properties>;
 
 // tag : V6_vprefixqb
-class Hexagon_v32i32_v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1_Intrinsic_128B<
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-       [llvm_v32i32_ty], [llvm_v1024i1_ty],
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty], [llvm_v128i1_ty],
        intr_properties>;
 
 // V5 Scalar Instructions.
@@ -3779,28 +3779,28 @@ Hexagon_double_doubledoubledouble_Intrinsic<"HEXAGON_F2_dfmpyhh", [IntrNoMem, Th
 // V60 HVX Instructions.
 
 def int_hexagon_V6_vS32b_qpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_qpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_nqpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_nqpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_nt_qpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_nt_qpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_nt_nqpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vS32b_nt_nqpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_valignb :
 Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignb">;
@@ -4643,76 +4643,76 @@ def int_hexagon_V6_vd0_128B :
 Hexagon_v32i32__Intrinsic<"HEXAGON_V6_vd0_128B">;
 
 def int_hexagon_V6_vaddbq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vaddbq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vsubbq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vsubbq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vaddbnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vaddbnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vsubbnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vsubbnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vaddhq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vaddhq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vsubhq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vsubhq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vaddhnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vaddhnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vsubhnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vsubhnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vaddwq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vaddwq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vsubwq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vsubwq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vaddwnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vaddwnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vsubwnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vsubwnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vabsh :
 Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsh">;
@@ -5045,298 +5045,298 @@ def int_hexagon_V6_vnot_128B :
 Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnot_128B">;
 
 def int_hexagon_V6_vandqrt :
-Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt">;
+Hexagon_custom_v16i32_v64i1i32_Intrinsic;
 
 def int_hexagon_V6_vandqrt_128B :
-Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B;
 
 def int_hexagon_V6_vandqrt_acc :
-Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
+Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic;
 
 def int_hexagon_V6_vandqrt_acc_128B :
-Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
+Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B;
 
 def int_hexagon_V6_vandvrt :
-Hexagon_v512i1_v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt">;
+Hexagon_custom_v64i1_v16i32i32_Intrinsic;
 
 def int_hexagon_V6_vandvrt_128B :
-Hexagon_v1024i1_v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
+Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vandvrt_acc :
-Hexagon_v512i1_v512i1v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic;
 
 def int_hexagon_V6_vandvrt_acc_128B :
-Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
+Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtw :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtw_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtw_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtw_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtw_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtw_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtw_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtw_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqw :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqw_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqw_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqw_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqw_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqw_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqw_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqw_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgth :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgth_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgth_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgth_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgth_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgth_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgth_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgth_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqh :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqh_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqh_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqh_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqh_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqh_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqh_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqh_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtb :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtb_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtb_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtb_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtb_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtb_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtb_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtb_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqb :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqb_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqb_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqb_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqb_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqb_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_veqb_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_veqb_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuw :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuw_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuw_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuw_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuw_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuw_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuw_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuw_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuh :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuh_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuh_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuh_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuh_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuh_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtuh_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtuh_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtub :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtub_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtub_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtub_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtub_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtub_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vgtub_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vgtub_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_pred_or :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_pred_or_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_pred_and :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_pred_and_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_pred_not :
-Hexagon_v512i1_v512i1_Intrinsic<"HEXAGON_V6_pred_not">;
+Hexagon_custom_v64i1_v64i1_Intrinsic;
 
 def int_hexagon_V6_pred_not_128B :
-Hexagon_v1024i1_v1024i1_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+Hexagon_custom_v128i1_v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_pred_xor :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_xor">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_pred_xor_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_pred_and_n :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and_n">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_pred_and_n_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_pred_or_n :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or_n">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_pred_or_n_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_pred_scalar2 :
-Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+Hexagon_custom_v64i1_i32_Intrinsic;
 
 def int_hexagon_V6_pred_scalar2_128B :
-Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+Hexagon_custom_v128i1_i32_Intrinsic_128B;
 
 def int_hexagon_V6_vmux :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vmux">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vmux_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vmux_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vswap :
-Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vswap">;
+Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic;
 
 def int_hexagon_V6_vswap_128B :
-Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vswap_128B">;
+Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vmaxub :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxub">;
@@ -5677,16 +5677,16 @@ def int_hexagon_V6_vsubbsat_dv_128B :
 Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubbsat_dv_128B">;
 
 def int_hexagon_V6_vaddcarry :
-Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
+Hexagon_custom_v16i32v64i1_v16i32v16i32v64i1_Intrinsic;
 
 def int_hexagon_V6_vaddcarry_128B :
-Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
+Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_vsubcarry :
-Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
+Hexagon_custom_v16i32v64i1_v16i32v16i32v64i1_Intrinsic;
 
 def int_hexagon_V6_vsubcarry_128B :
-Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
+Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_vaddububb_sat :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
@@ -5755,46 +5755,46 @@ def int_hexagon_V6_vmpyiwub_acc_128B :
 Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_acc_128B">;
 
 def int_hexagon_V6_vandnqrt :
-Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt">;
+Hexagon_custom_v16i32_v64i1i32_Intrinsic;
 
 def int_hexagon_V6_vandnqrt_128B :
-Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_128B">;
+Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B;
 
 def int_hexagon_V6_vandnqrt_acc :
-Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc">;
+Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic;
 
 def int_hexagon_V6_vandnqrt_acc_128B :
-Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">;
+Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B;
 
 def int_hexagon_V6_vandvqv :
-Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvqv">;
+Hexagon_custom_v16i32_v64i1v16i32_Intrinsic;
 
 def int_hexagon_V6_vandvqv_128B :
-Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvqv_128B">;
+Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_vandvnqv :
-Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvnqv">;
+Hexagon_custom_v16i32_v64i1v16i32_Intrinsic;
 
 def int_hexagon_V6_vandvnqv_128B :
-Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvnqv_128B">;
+Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B;
 
 def int_hexagon_V6_pred_scalar2v2 :
-Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2">;
+Hexagon_custom_v64i1_i32_Intrinsic;
 
 def int_hexagon_V6_pred_scalar2v2_128B :
-Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">;
+Hexagon_custom_v128i1_i32_Intrinsic_128B;
 
 def int_hexagon_V6_shuffeqw :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqw">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_shuffeqw_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqw_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_shuffeqh :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqh">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
 
 def int_hexagon_V6_shuffeqh_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqh_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_vmaxb :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxb">;
@@ -6027,22 +6027,22 @@ def int_hexagon_V6_vgathermhw_128B :
 Hexagon__ptri32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhw_128B", [IntrArgMemOnly]>;
 
 def int_hexagon_V6_vgathermwq :
-Hexagon__ptrv512i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermwq", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>;
 
 def int_hexagon_V6_vgathermwq_128B :
-Hexagon__ptrv1024i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermwq_128B", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>;
 
 def int_hexagon_V6_vgathermhq :
-Hexagon__ptrv512i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermhq", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>;
 
 def int_hexagon_V6_vgathermhq_128B :
-Hexagon__ptrv1024i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhq_128B", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>;
 
 def int_hexagon_V6_vgathermhwq :
-Hexagon__ptrv512i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhwq", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic<[IntrArgMemOnly]>;
 
 def int_hexagon_V6_vgathermhwq_128B :
-Hexagon__ptrv1024i1i32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhwq_128B", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B<[IntrArgMemOnly]>;
 
 def int_hexagon_V6_vscattermw :
 Hexagon__i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermw", [IntrWriteMem]>;
@@ -6069,16 +6069,16 @@ def int_hexagon_V6_vscattermh_add_128B :
 Hexagon__i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermh_add_128B", [IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermwq :
-Hexagon__v512i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermwq", [IntrWriteMem]>;
+Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermwq_128B :
-Hexagon__v1024i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermwq_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermhq :
-Hexagon__v512i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhq", [IntrWriteMem]>;
+Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermhq_128B :
-Hexagon__v1024i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhq_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermhw :
 Hexagon__i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhw", [IntrWriteMem]>;
@@ -6087,10 +6087,10 @@ def int_hexagon_V6_vscattermhw_128B :
 Hexagon__i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhw_128B", [IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermhwq :
-Hexagon__v512i1i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhwq", [IntrWriteMem]>;
+Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic<[IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermhwq_128B :
-Hexagon__v1024i1i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhwq_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B<[IntrWriteMem]>;
 
 def int_hexagon_V6_vscattermhw_add :
 Hexagon__i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhw_add", [IntrWriteMem]>;
@@ -6099,22 +6099,22 @@ def int_hexagon_V6_vscattermhw_add_128B :
 Hexagon__i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B", [IntrWriteMem]>;
 
 def int_hexagon_V6_vprefixqb :
-Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqb">;
+Hexagon_custom_v16i32_v64i1_Intrinsic;
 
 def int_hexagon_V6_vprefixqb_128B :
-Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqb_128B">;
+Hexagon_custom_v32i32_v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_vprefixqh :
-Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqh">;
+Hexagon_custom_v16i32_v64i1_Intrinsic;
 
 def int_hexagon_V6_vprefixqh_128B :
-Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqh_128B">;
+Hexagon_custom_v32i32_v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_vprefixqw :
-Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqw">;
+Hexagon_custom_v16i32_v64i1_Intrinsic;
 
 def int_hexagon_V6_vprefixqw_128B :
-Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqw_128B">;
+Hexagon_custom_v32i32_v128i1_Intrinsic_128B;
 
 // V66 HVX Instructions.
 
@@ -6131,10 +6131,10 @@ def int_hexagon_V6_vasr_into_128B :
 Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vasr_into_128B">;
 
 def int_hexagon_V6_vaddcarrysat :
-Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<"HEXAGON_V6_vaddcarrysat">;
+Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic;
 
 def int_hexagon_V6_vaddcarrysat_128B :
-Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<"HEXAGON_V6_vaddcarrysat_128B">;
+Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B;
 
 def int_hexagon_V6_vsatdw :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatdw">;

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 49bc133545ee..c0f92042e5da 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1199,7 +1199,7 @@ OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
                          ResultStack &Results) {
   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
   MVT ByteTy = getSingleVT(MVT::i8);
-  MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX
+  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
   const SDLoc &dl(Results.InpNode);
   SDValue B = getVectorConstant(Bytes, dl);
   Results.push(Hexagon::V6_vd0, ByteTy, {});
@@ -2203,28 +2203,28 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
   case Intrinsic::hexagon_V6_vaddcarry: {
     std::array<SDValue, 3> Ops = {
         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
-    SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
+    SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
     Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
     break;
   }
   case Intrinsic::hexagon_V6_vaddcarry_128B: {
     std::array<SDValue, 3> Ops = {
         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
-    SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
+    SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
     Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
     break;
   }
   case Intrinsic::hexagon_V6_vsubcarry: {
     std::array<SDValue, 3> Ops = {
         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
-    SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
+    SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
     Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
     break;
   }
   case Intrinsic::hexagon_V6_vsubcarry_128B: {
     std::array<SDValue, 3> Ops = {
         {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
-    SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
+    SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
     Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
     break;
   }

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0a23b50986fa..284c6b204c3a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1080,42 +1080,24 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-static Constant *convert_i1_to_i8(const Constant *ConstVal) {
-  SmallVector<Constant *, 128> NewConst;
-  const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
-  if (!CV)
-    return nullptr;
-
-  LLVMContext &Ctx = ConstVal->getContext();
-  IRBuilder<> IRB(Ctx);
-  unsigned NumVectorElements = CV->getNumOperands();
-  assert(isPowerOf2_32(NumVectorElements) &&
-         "conversion only supported for pow2 VectorSize!");
-
-  for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
-    uint8_t x = 0;
-    for (unsigned j = 0; j < 8; ++j) {
-      uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
-      x |= y << (7 - j);
-    }
-    assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
-    NewConst.push_back(IRB.getInt8(x));
-  }
-  return ConstantVector::get(NewConst);
-}
-
 SDValue
 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   EVT ValTy = Op.getValueType();
   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
   Constant *CVal = nullptr;
   bool isVTi1Type = false;
-  if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
-    Type *CValTy = ConstVal->getType();
-    if (CValTy->isVectorTy() &&
-        CValTy->getVectorElementType()->isIntegerTy(1)) {
-      CVal = convert_i1_to_i8(ConstVal);
-      isVTi1Type = (CVal != nullptr);
+  if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
+    if (CV->getType()->getVectorElementType()->isIntegerTy(1)) {
+      IRBuilder<> IRB(CV->getContext());
+      SmallVector<Constant*, 128> NewConst;
+      unsigned VecLen = CV->getNumOperands();
+      assert(isPowerOf2_32(VecLen) &&
+             "conversion only supported for pow2 VectorSize");
+      for (unsigned i = 0; i < VecLen; ++i)
+        NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
+
+      CVal = ConstantVector::get(NewConst);
+      isVTi1Type = true;
     }
   }
   unsigned Align = CPN->getAlignment();
@@ -3225,8 +3207,8 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
       switch (VT.getSizeInBits()) {
       default:
         return {0u, nullptr};
-      case 512:
-      case 1024:
+      case 64:
+      case 128:
         return {0u, &Hexagon::HvxQRRegClass};
       }
       break;

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 204950f9010e..b18afb209240 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -39,7 +39,6 @@ HexagonTargetLowering::initializeHVXLowering() {
     addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
     addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
     addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
-    addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
   } else if (Subtarget.useHVX128BOps()) {
     addRegisterClass(MVT::v128i8,  &Hexagon::HvxVRRegClass);
     addRegisterClass(MVT::v64i16,  &Hexagon::HvxVRRegClass);
@@ -50,7 +49,6 @@ HexagonTargetLowering::initializeHVXLowering() {
     addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
     addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
     addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
-    addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
   }
 
   // Set up operation actions.

diff  --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 8ae55b207188..4f0e7e8ed2cc 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -277,76 +277,6 @@ def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
            Requires<[UseHVX]>;
 }
 
-def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
-           (v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
-           (v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (bitconvert (v64i8  HvxVR:$src1))),
-           (v512i1 (V6_vandvrt (v64i8  HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
-           (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
-           (v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v64i8  (bitconvert (v512i1 HvxQR:$src1))),
-           (v64i8  (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
-           (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
-           (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
-           (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
-           (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
-           (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
-           (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-let AddedComplexity = 140 in {
-def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
-           (V6_vS32b_ai IntRegs:$addr, 0,
-           (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
-           (v512i1 (V6_vandvrt
-           (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
-           (V6_vS32b_ai IntRegs:$addr, 0,
-           (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
-           Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
-           (v1024i1 (V6_vandvrt
-           (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
-           Requires<[UseHVX]>;
-}
-
 def: Pat<(v64i16 (trunc v64i32:$Vdd)),
          (v64i16 (V6_vpackwh_sat
                  (v32i32 (V6_hi HvxWR:$Vdd)),

diff  --git a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index a60c80beb5d6..1245ee7974b5 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -25,59 +25,59 @@ def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
             (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi)) >;
 }
 
-def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
-           (v512i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v16i32 HvxVR:$src1))),
+           (v64i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
-           (v512i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v32i16 HvxVR:$src1))),
+           (v64i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v512i1 (bitconvert (v64i8  HvxVR:$src1))),
-           (v512i1 (V6_vandvrt(v64i8  HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v64i8  HvxVR:$src1))),
+           (v64i1 (V6_vandvrt(v64i8  HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
-           (v16i32 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v16i32 (bitconvert (v64i1 HvxQR:$src1))),
+           (v16i32 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
-           (v32i16 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v32i16 (bitconvert (v64i1 HvxQR:$src1))),
+           (v32i16 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v64i8  (bitconvert (v512i1 HvxQR:$src1))),
-           (v64i8  (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i8  (bitconvert (v64i1 HvxQR:$src1))),
+           (v64i8  (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
-           (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v32i32 HvxVR:$src1))),
+           (v128i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
-           (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v64i16 HvxVR:$src1))),
+           (v128i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v1024i1 (bitconvert (v128i8  HvxVR:$src1))),
-           (v1024i1 (V6_vandvrt (v128i8  HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v128i8  HvxVR:$src1))),
+           (v128i1 (V6_vandvrt (v128i8  HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
-           (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v32i32 (bitconvert (v128i1 HvxQR:$src1))),
+           (v32i32 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
-           (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i16 (bitconvert (v128i1 HvxQR:$src1))),
+           (v64i16 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(v128i8  (bitconvert (v1024i1 HvxQR:$src1))),
-           (v128i8  (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i8  (bitconvert (v128i1 HvxQR:$src1))),
+           (v128i8  (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
 
 let AddedComplexity = 140 in {
-def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+def : Pat <(store (v64i1 HvxQR:$src1), (i32 IntRegs:$addr)),
            (V6_vS32b_ai IntRegs:$addr, 0,
-           (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1),
+           (v16i32 (V6_vandqrt (v64i1 HvxQR:$src1),
                                        (A2_tfrsi 0x01010101))))>;
 
-def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
-           (v512i1 (V6_vandvrt
+def : Pat <(v64i1 (load (i32 IntRegs:$addr))),
+           (v64i1 (V6_vandvrt
            (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
 
-def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+def : Pat <(store (v128i1 HvxQR:$src1), (i32 IntRegs:$addr)),
            (V6_vS32b_ai IntRegs:$addr, 0,
-           (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1),
+           (v32i32 (V6_vandqrt (v128i1 HvxQR:$src1),
                                        (A2_tfrsi 0x01010101))))>;
 
-def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
-           (v1024i1 (V6_vandvrt
+def : Pat <(v128i1 (load (i32 IntRegs:$addr))),
+           (v128i1 (V6_vandvrt
            (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
 }
 

diff  --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index ea39dc44d15b..49428db223a1 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -319,7 +319,7 @@ let Namespace = "Hexagon" in {
 // HVX types
 
 def VecI1:   ValueTypeByHwMode<[Hvx64,  Hvx128,  DefaultMode],
-                               [v512i1, v1024i1, v512i1]>;
+                               [v64i1,  v128i1,  v64i1]>;
 def VecI8:   ValueTypeByHwMode<[Hvx64,  Hvx128,  DefaultMode],
                                [v64i8,  v128i8,  v64i8]>;
 def VecI16:  ValueTypeByHwMode<[Hvx64,  Hvx128,  DefaultMode],
@@ -355,10 +355,10 @@ def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
     [RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
 }
 
-def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
+def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 128,
   (add Q0, Q1, Q2, Q3)> {
   let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
-    [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
+    [RegInfo<64,512,512>, RegInfo<128,1024,1024>, RegInfo<64,512,512>]>;
 }
 
 def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,

diff  --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 2c6d489f53e4..c9f04651cf70 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -286,9 +286,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
     ArrayRef<MVT> ElemTypes = getHVXElementTypes();
 
     if (IncludeBool && ElemTy == MVT::i1) {
-      // Special case for the v512i1, etc.
-      if (8*HwLen == NumElems)
-        return true;
       // Boolean HVX vector types are formed from regular HVX vector types
       // by replacing the element type with i1.
       for (MVT T : ElemTypes)

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
index 0fc8ba4bf1dc..08dd342d0632 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
@@ -10,7 +10,7 @@ define <128 x i8> @t00(<128 x i8> %a0, <128 x i8> %a1) #0 {
   ret <128 x i8> %v0
 }
 
-declare <1024 x i1> @llvm.hexagon.vandvrt.128B(<128 x i8>, i32)
+declare <128 x i1> @llvm.hexagon.vandvrt.128B(<128 x i8>, i32)
 
 ; CHECK-LABEL: t01
 ; CHECK: vor(v{{[0-9:]+}},v{{[0-9:]+}})

diff  --git a/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll b/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
index e9fd9a0977dc..339cc3887300 100644
--- a/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
+++ b/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
@@ -3,40 +3,34 @@
 
 define inreg <16 x i32> @f0(i32 %a0, <16 x i32>* nocapture %a1) #0 {
 b0:
-  %v0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a0)
-  %v1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %v0)
+  %v0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a0)
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %v0)
   %v2 = icmp ult i32 %a0, 48
   br i1 %v2, label %b1, label %b2
 
 b1:                                               ; preds = %b0
   %v3 = add nuw nsw i32 %a0, 16
-  %v4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v3)
-  %v5 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v4, <512 x i1> %v1)
+  %v4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v3)
+  %v5 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v4, <64 x i1> %v1)
   br label %b2
 
 b2:                                               ; preds = %b1, %b0
-  %v6 = phi <512 x i1> [ %v5, %b1 ], [ %v1, %b0 ]
-  %v7 = bitcast <512 x i1> %v6 to <16 x i32>
+  %v6 = phi <64 x i1> [ %v5, %b1 ], [ %v1, %b0 ]
+  %v7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v6, i32 -1)
   %v8 = getelementptr inbounds <16 x i32>, <16 x i32>* %a1, i32 1
   %v9 = load <16 x i32>, <16 x i32>* %v8, align 64
   %v10 = getelementptr inbounds <16 x i32>, <16 x i32>* %a1, i32 2
   %v11 = load <16 x i32>, <16 x i32>* %v10, align 64
-  %v12 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v6, <16 x i32> %v9, <16 x i32> %v11)
+  %v12 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v6, <16 x i32> %v9, <16 x i32> %v11)
   store <16 x i32> %v12, <16 x i32>* %a1, align 64
   ret <16 x i32> %v7
 }
 
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
-
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
-
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
-
-; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll b/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
index 4c266a68c245..9246b026b664 100644
--- a/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
+++ b/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
@@ -6,12 +6,12 @@ define void @convert_const_i1_to_i8(<32 x i32>* %a0) #0 {
 entry:
   %v0 = load <32 x i32>, <32 x i32>* %a0, align 128
   %v1 = tail call <32 x i32> @llvm.hexagon.V6.vrdelta.128B(<32 x i32> %v0, <32 x i32> undef)
-  %v2 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i32> undef, <32 x i32> %v1)
+  %v2 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>, <32 x i32> undef, <32 x i32> %v1)
   store <32 x i32> %v2, <32 x i32>* %a0, align 128
   ret void
 }
 
 declare <32 x i32> @llvm.hexagon.V6.vrdelta.128B(<32 x i32>, <32 x i32>)
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>)
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>)
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }

diff  --git a/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll b/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
index 05074338cffb..372e96dbff83 100644
--- a/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
+++ b/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
@@ -14,22 +14,21 @@ target triple = "hexagon"
 ; CHECK: if (q{{[0-3]}}) vmem
 define void @fred(i32 %a0) #0 {
 b1:
-  %v2 = tail call <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %a0) #2
+  %v2 = tail call <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %a0) #2
   br i1 undef, label %b3, label %b5
 
 b3:                                               ; preds = %b1
-  %v4 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> %v2) #2
+  %v4 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> %v2) #2
   br label %b5
 
 b5:                                               ; preds = %b3, %b1
-  %v6 = phi <1024 x i1> [ %v4, %b3 ], [ %v2, %b1 ]
-  %v7 = bitcast <1024 x i1> %v6 to <32 x i32>
-  tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<32 x i32> %v7, <32 x i32>* undef, <32 x i32> undef) #2
+  %v6 = phi <128 x i1> [ %v4, %b3 ], [ %v2, %b1 ]
+  tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<128 x i1> %v6, <32 x i32>* undef, <32 x i32> undef) #2
   ret void
 }
 
-declare <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1
-declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
index 4c93ab201e3b..7cc92736fda4 100644
--- a/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
@@ -47,61 +47,61 @@ for.body:
   %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
   %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
   %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
-  %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
-  %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
-  %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
-  %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
-  %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
-  %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
-  %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
-  %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
-  %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
-  %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
-  %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
-  %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
-  %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
-  %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
-  %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
-  %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
-  %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
-  %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
-  %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
-  %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
-  %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
-  %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
-  %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
-  %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
-  %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
-  %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
-  %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
-  %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
-  %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
-  %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
-  %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
-  %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
-  %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
-  %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
-  %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
-  %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
-  %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
-  %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
-  %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
-  %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
-  %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
-  %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
-  %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
-  %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
-  %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
-  %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
-  %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
-  %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
-  %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
-  %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
-  %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
-  %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
-  %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
-  %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
-  %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
+  %20 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
+  %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %11, <32 x i32> %2)
+  %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %2, <32 x i32> %11)
+  %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> undef, <32 x i32> %3)
+  %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %12, <32 x i32> undef)
+  %25 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
+  %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %15, <32 x i32> %7)
+  %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %7, <32 x i32> %15)
+  %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %16, <32 x i32> %8)
+  %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %8, <32 x i32> %16)
+  %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %17, <32 x i32> %9)
+  %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %9, <32 x i32> %17)
+  %32 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
+  %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %13, <32 x i32> %4)
+  %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %4, <32 x i32> %13)
+  %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> undef, <32 x i32> %5)
+  %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %5, <32 x i32> undef)
+  %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %14, <32 x i32> %6)
+  %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %6, <32 x i32> %14)
+  %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
+  %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
+  %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
+  %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
+  %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
+  %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
+  %45 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
+  %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %26, <32 x i32> %21)
+  %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %21, <32 x i32> %26)
+  %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %28, <32 x i32> %23)
+  %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %23, <32 x i32> %28)
+  %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %30, <32 x i32> %24)
+  %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %24, <32 x i32> %30)
+  %52 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
+  %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %27, <32 x i32> %22)
+  %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %22, <32 x i32> %27)
+  %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %29, <32 x i32> undef)
+  %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> undef, <32 x i32> %31)
+  %57 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
+  %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %39, <32 x i32> %33)
+  %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %33, <32 x i32> %39)
+  %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %41, <32 x i32> %35)
+  %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %43, <32 x i32> %37)
+  %62 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
+  %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %62, <32 x i32> %42, <32 x i32> %36)
+  %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %62, <32 x i32> %38, <32 x i32> %44)
+  %65 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
+  %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %58, <32 x i32> %46)
+  %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %60, <32 x i32> %48)
+  %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %61, <32 x i32> %50)
+  %69 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
+  %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
+  %71 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
+  %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %71, <32 x i32> %63, <32 x i32> %55)
+  %73 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
+  %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %73, <32 x i32> %56, <32 x i32> %64)
   %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
   %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
   %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
@@ -129,9 +129,9 @@ for.end:
   ret void
 }
 
-declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
 
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
 

diff  --git a/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll b/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
index c7d348ad3d38..e54ca1ea3435 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
@@ -7,51 +7,52 @@
 
 define void @f0(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+  %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
   ret void
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
 
 ; CHECK-LABEL: f1:
 ; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}}
 
 define void @f1(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+  %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
   ret void
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
 
 ; CHECK-LABEL: f2:
 ; CHECK: if (q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
 
 define void @f2(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+  %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
   ret void
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
 
 ; CHECK-LABEL: f3:
 ; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
 
 define void @f3(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+  %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
   ret void
 }
 
-; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1
 
 attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll b/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
index 27c509e49190..78c5a1161ca8 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
@@ -7,51 +7,52 @@
 
 define void @f0(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <16 x i32> %a0 to <512 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.qpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+  %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.qpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
   ret void
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.qpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.qpred.ai(<64 x i1>, i8*, <16 x i32>) #0
 
 ; CHECK-LABEL: f1:
 ; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}}
 
 define void @f1(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <16 x i32> %a0 to <512 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.nqpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+  %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.nqpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
   ret void
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nqpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nqpred.ai(<64 x i1>, i8*, <16 x i32>) #0
 
 ; CHECK-LABEL: f2:
 ; CHECK: if (q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
 
 define void @f2(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <16 x i32> %a0 to <512 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+  %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
   ret void
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<64 x i1>, i8*, <16 x i32>) #0
 
 ; CHECK-LABEL: f3:
 ; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
 
 define void @f3(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
 b0:
-  %v0 = bitcast <16 x i32> %a0 to <512 x i1>
-  tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+  %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+  tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
   ret void
 }
 
-; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<64 x i1>, i8*, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll b/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
index f22ad09bae8f..b047e3801345 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
@@ -6,29 +6,34 @@
 ; CHECK: v{{[0-9]+}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
 define inreg <32 x i32> @f0(<32 x i32> %a0, <32 x i32> %a1, i8* nocapture readonly %a2) #0 {
 b0:
-  %v0 = bitcast i8* %a2 to <1024 x i1>*
-  %v1 = load <1024 x i1>, <1024 x i1>* %v0, align 128
-  %v2 = tail call { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <1024 x i1> %v1)
-  %v3 = extractvalue { <32 x i32>, <1024 x i1> } %v2, 0
-  ret <32 x i32> %v3
+  %v0 = bitcast i8* %a2 to <32 x i32>*
+  %v1 = load <32 x i32>, <32 x i32>* %v0, align 128
+  %v2 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %v1, i32 -1)
+  %v3 = tail call { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <128 x i1> %v2)
+  %v4 = extractvalue { <32 x i32>, <128 x i1> } %v3, 0
+  ret <32 x i32> %v4
 }
 
 ; CHECK-LABEL: f1:
 ; CHECK: v{{[0-9]+}}.w = vsub(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
 define inreg <32 x i32> @f1(<32 x i32> %a0, <32 x i32> %a1, i8* nocapture readonly %a2) #0 {
 b0:
-  %v0 = bitcast i8* %a2 to <1024 x i1>*
-  %v1 = load <1024 x i1>, <1024 x i1>* %v0, align 128
-  %v2 = tail call { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <1024 x i1> %v1)
-  %v3 = extractvalue { <32 x i32>, <1024 x i1> } %v2, 0
-  ret <32 x i32> %v3
+  %v0 = bitcast i8* %a2 to <32 x i32>*
+  %v1 = load <32 x i32>, <32 x i32>* %v0, align 128
+  %v2 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %v1, i32 -1)
+  %v3 = tail call { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <128 x i1> %v2)
+  %v4 = extractvalue { <32 x i32>, <128 x i1> } %v3, 0
+  ret <32 x i32> %v4
 }
 
 ; Function Attrs: nounwind readnone
-declare { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32>, <32 x i32>, <1024 x i1>) #1
+declare { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32>, <32 x i32>, <128 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32>, <32 x i32>, <1024 x i1>) #1
+declare { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32>, <32 x i32>, <128 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv65" "target-features"="+hvxv65,+hvx-length128b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll b/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
index f4d3e59fa95d..cb859aa809e7 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
@@ -6,29 +6,34 @@
 ; CHECK: v{{[0-9]+}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
 define inreg <16 x i32> @f0(<16 x i32> %a0, <16 x i32> %a1, i8* nocapture readonly %a2) #0 {
 b0:
-  %v0 = bitcast i8* %a2 to <512 x i1>*
-  %v1 = load <512 x i1>, <512 x i1>* %v0, align 64
-  %v2 = tail call { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32> %a0, <16 x i32> %a1, <512 x i1> %v1)
-  %v3 = extractvalue { <16 x i32>, <512 x i1> } %v2, 0
-  ret <16 x i32> %v3
+  %v0 = bitcast i8* %a2 to <16 x i32>*
+  %v1 = load <16 x i32>, <16 x i32>* %v0, align 64
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v1, i32 -1)
+  %v3 = tail call { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32> %a0, <16 x i32> %a1, <64 x i1> %v2)
+  %v4 = extractvalue { <16 x i32>, <64 x i1> } %v3, 0
+  ret <16 x i32> %v4
 }
 
 ; CHECK-LABEL: f1:
 ; CHECK: v{{[0-9]+}}.w = vsub(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
 define inreg <16 x i32> @f1(<16 x i32> %a0, <16 x i32> %a1, i8* nocapture readonly %a2) #0 {
 b0:
-  %v0 = bitcast i8* %a2 to <512 x i1>*
-  %v1 = load <512 x i1>, <512 x i1>* %v0, align 64
-  %v2 = tail call { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32> %a0, <16 x i32> %a1, <512 x i1> %v1)
-  %v3 = extractvalue { <16 x i32>, <512 x i1> } %v2, 0
-  ret <16 x i32> %v3
+  %v0 = bitcast i8* %a2 to <16 x i32>*
+  %v1 = load <16 x i32>, <16 x i32>* %v0, align 64
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v1, i32 -1)
+  %v3 = tail call { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32> %a0, <16 x i32> %a1, <64 x i1> %v2)
+  %v4 = extractvalue { <16 x i32>, <64 x i1> } %v3, 0
+  ret <16 x i32> %v4
 }
 
 ; Function Attrs: nounwind readnone
-declare { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32>, <16 x i32>, <512 x i1>) #1
+declare { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32>, <16 x i32>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32>, <16 x i32>, <512 x i1>) #1
+declare { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32>, <16 x i32>, <64 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv65" "target-features"="+hvxv65,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll b/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
index 26f4ac0bd038..5e9d2239edd6 100644
--- a/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
+++ b/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
@@ -10,10 +10,13 @@ target triple = "hexagon"
 ; Function Attrs: nounwind
 define void @foo(<16 x i32> %v0, <16 x i32> %v1, <16 x i32>* nocapture %p) #0 {
 entry:
-  %0 = tail call <16 x i32> asm "$0 = vgtw($1.w,$2.w)", "=q,v,v"(<16 x i32> %v0, <16 x i32> %v1) #1
-  store <16 x i32> %0, <16 x i32>* %p, align 64
+  %0 = tail call <64 x i1> asm "$0 = vgtw($1.w,$2.w)", "=q,v,v"(<16 x i32> %v0, <16 x i32> %v1) #1
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1) #1
+  store <16 x i32> %1, <16 x i32>* %p, align 64
   ret void
 }
 
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll b/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
index 7d2f50ed58a4..89ab13ada40b 100644
--- a/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
+++ b/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
@@ -8,7 +8,7 @@ target triple = "hexagon"
 ; CHECK-LABEL: fred
 ; CHECK: if (q{{[0-3]}}) vmem
 define void @fred() #0 {
-  tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> undef) #0
+  tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> undef) #0
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll b/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
index cdb6b6fa80a4..ca026ded3f91 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
@@ -668,8 +668,8 @@ entry:
 ; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.b += v{{[0-9]+}}.b
 define <16 x i32> @test84(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -677,8 +677,8 @@ entry:
 ; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.h += v{{[0-9]+}}.h
 define <16 x i32> @test85(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -686,8 +686,8 @@ entry:
 ; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.w += v{{[0-9]+}}.w
 define <16 x i32> @test86(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -695,8 +695,8 @@ entry:
 ; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.b += v{{[0-9]+}}.b
 define <16 x i32> @test87(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -704,8 +704,8 @@ entry:
 ; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.h += v{{[0-9]+}}.h
 define <16 x i32> @test88(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -713,8 +713,8 @@ entry:
 ; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.w += v{{[0-9]+}}.w
 define <16 x i32> @test89(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -722,8 +722,8 @@ entry:
 ; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.b -= v{{[0-9]+}}.b
 define <16 x i32> @test90(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -731,8 +731,8 @@ entry:
 ; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.h -= v{{[0-9]+}}.h
 define <16 x i32> @test91(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -740,8 +740,8 @@ entry:
 ; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.w -= v{{[0-9]+}}.w
 define <16 x i32> @test92(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -749,8 +749,8 @@ entry:
 ; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.b -= v{{[0-9]+}}.b
 define <16 x i32> @test93(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -758,8 +758,8 @@ entry:
 ; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.h -= v{{[0-9]+}}.h
 define <16 x i32> @test94(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -767,8 +767,8 @@ entry:
 ; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.w -= v{{[0-9]+}}.w
 define <16 x i32> @test95(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
   ret <16 x i32> %1
 }
 
@@ -999,18 +999,18 @@ declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32>, <16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #0
@@ -1029,6 +1029,7 @@ declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #0
 declare <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32>) #0
 declare <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
 
 attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll b/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
index 2281f46b8518..62d2ec177303 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
@@ -181,8 +181,8 @@ entry:
 ; CHECK: v{{[0-9]+}} = vmux(q{{[0-3]+}},v{{[0-9]+}},v{{[0-9]+}})
 define void @test20(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %0, <16 x i32> %b, <16 x i32> %c)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %0, <16 x i32> %b, <16 x i32> %c)
   store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
@@ -191,10 +191,11 @@ entry:
 ; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},q{{[0-3]+}})
 define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %0, <512 x i1> %1)
-  store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %0, <64 x i1> %1)
+  %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+  store <16 x i32> %3, <16 x i32>* @h, align 64
   ret void
 }
 
@@ -202,10 +203,11 @@ entry:
 ; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},q{{[0-3]+}})
 define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %0, <512 x i1> %1)
-  store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %0, <64 x i1> %1)
+  %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+  store <16 x i32> %3, <16 x i32>* @h, align 64
   ret void
 }
 
@@ -213,9 +215,10 @@ entry:
 ; CHECK: q{{[0-3]+}} = not(q{{[0-3]+}})
 define void @test23(<16 x i32> %a) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %0)
-  store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %0)
+  %2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
+  store <16 x i32> %2, <16 x i32>* @h, align 64
   ret void
 }
 
@@ -223,10 +226,11 @@ entry:
 ; CHECK: q{{[0-3]+}} = xor(q{{[0-3]+}},q{{[0-3]+}})
 define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %0, <512 x i1> %1)
-  store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %0, <64 x i1> %1)
+  %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+  store <16 x i32> %3, <16 x i32>* @h, align 64
   ret void
 }
 
@@ -234,10 +238,11 @@ entry:
 ; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},!q{{[0-3]+}})
 define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %0, <512 x i1> %1)
-  store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %0, <64 x i1> %1)
+  %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+  store <16 x i32> %3, <16 x i32>* @h, align 64
   ret void
 }
 
@@ -245,10 +250,11 @@ entry:
 ; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},!q{{[0-3]+}})
 define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %0, <512 x i1> %1)
-  store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %0, <64 x i1> %1)
+  %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+  store <16 x i32> %3, <16 x i32>* @h, align 64
   ret void
 }
 
@@ -256,8 +262,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
 define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -265,8 +272,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test28(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -274,8 +282,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test29(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -283,8 +292,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test30(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -292,8 +302,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test31(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -301,8 +312,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
 define void @test32(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -310,8 +322,8 @@ entry:
 ; CHECK: v{{[0-9]+}} |= vand(q{{[0-3]+}},r{{[0-9]+}})
 define void @test33(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %b to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <512 x i1> %0, i32 %c)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <64 x i1> %0, i32 %c)
   store <16 x i32> %1, <16 x i32>* @h, align 64
   ret void
 }
@@ -320,9 +332,10 @@ entry:
 ; CHECK: q{{[0-3]+}} |= vand(v{{[0-9]+}},r{{[0-9]+}})
 define void @test34(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %0, <16 x i32> %b, i32 %c)
-  store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %0, <16 x i32> %b, i32 %c)
+  %2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
+  store <16 x i32> %2, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -330,8 +343,8 @@ entry:
 ; CHECK: v{{[0-9]+}} = vand(q{{[0-3]+}},r{{[0-9]+}})
 define void @test35(<16 x i32> %a, i32 %b) #0 {
 entry:
-  %0 = bitcast <16 x i32> %a to <512 x i1>
-  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %0, i32 %b)
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 %b)
   store <16 x i32> %1, <16 x i32>* @h, align 64
   ret void
 }
@@ -340,8 +353,9 @@ entry:
 ; CHECK: q{{[0-3]+}} = vand(v{{[0-9]+}},r{{[0-9]+}})
 define void @test36(<16 x i32> %a, i32 %b) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -476,8 +490,9 @@ entry:
 ; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]+}})
 define void @test51(i32 %a) #0 {
 entry:
-  %0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
-  store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+  %0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
+  %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+  store <16 x i32> %1, <16 x i32>* @k, align 64
   ret void
 }
 
@@ -546,23 +561,23 @@ declare <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32>, <16 x i32>) #0
 declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0
 declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #0
-declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #0
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #0
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #0
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
 declare i64 @llvm.hexagon.S6.rol.i.p(i64, i32) #0
 declare i64 @llvm.hexagon.S6.rol.i.p.acc(i64, i64, i32) #0
 declare i64 @llvm.hexagon.S6.rol.i.p.and(i64, i64, i32) #0
@@ -577,7 +592,7 @@ declare i32 @llvm.hexagon.S6.rol.i.r.or(i32, i32, i32) #0
 declare i32 @llvm.hexagon.S6.rol.i.r.xacc(i32, i32, i32) #0
 declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #0
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
 declare <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32>, <16 x i32>, i32) #0
 declare <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32>, <16 x i32>, i32) #0
 declare <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32>, <16 x i32>, <16 x i32>, i32) #0

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll b/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
index 588b0270902d..a3319b92164b 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
@@ -1,15 +1,16 @@
 ; RUN: llc -march=hexagon < %s | FileCheck %s
 
- at d = external global <16 x i32>
+ at d = external global <16 x i32>, align 64
 
 ; CHECK-LABEL: test1:
 ; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
 define void @test1(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -17,10 +18,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test2(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -28,10 +30,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test3(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -39,10 +42,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
 define void @test4(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -50,10 +54,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test5(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -61,10 +66,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test6(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -72,10 +78,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
 define void @test7(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -83,10 +90,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
 define void @test8(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -94,10 +102,11 @@ entry:
 ; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
 define void @test9(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -105,10 +114,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
 define void @test10(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -116,10 +126,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test11(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -127,10 +138,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test12(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -138,10 +150,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
 define void @test13(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -149,10 +162,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test14(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -160,10 +174,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test15(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -171,10 +186,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
 define void @test16(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -182,10 +198,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
 define void @test17(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -193,10 +210,11 @@ entry:
 ; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
 define void @test18(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -204,10 +222,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
 define void @test19(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -215,10 +234,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test20(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -226,10 +246,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -237,10 +258,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
 define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -248,10 +270,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
 define void @test23(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -259,10 +282,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
 define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -270,10 +294,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
 define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -281,10 +306,11 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
 define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
@@ -292,39 +318,42 @@ entry:
 ; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
 define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
 entry:
-  %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
-  %2 = bitcast <512 x i1> %1 to <16 x i32>
-  store <16 x i32> %2, <16 x i32>* @d, align 64
+  %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+  store <16 x i32> %v3, <16 x i32>* @d, align 64
   ret void
 }
 
-declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
 
 attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
index 3b853ebb444b..a9defbf11e26 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
@@ -12,30 +12,32 @@
 ; CHECK-LABEL: V6_vmaskedstorentnq_128B
 ; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
 
-declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
+
+declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1>, i8*, <32 x i32>)
 define void @V6_vmaskedstoreq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1>, i8*, <32 x i32>)
 define void @V6_vmaskedstorenq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1>, i8*, <32 x i32>)
 define void @V6_vmaskedstorentq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1>, i8*, <32 x i32>)
 define void @V6_vmaskedstorentnq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
   ret void
 }

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
index 5ff672224529..2aacaeae44b3 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
@@ -12,30 +12,32 @@
 ; CHECK-LABEL: V6_vmaskedstorentnq
 ; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
 
-declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>)
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
+
+declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>)
 define void @V6_vmaskedstoreq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> %1, i8* %b, <16 x i32> %c)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1>, i8*, <16 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1>, i8*, <16 x i32>)
 define void @V6_vmaskedstorenq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1> %1, i8* %b, <16 x i32> %c)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1>, i8*, <16 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1>, i8*, <16 x i32>)
 define void @V6_vmaskedstorentq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1> %1, i8* %b, <16 x i32> %c)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1>, i8*, <16 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1>, i8*, <16 x i32>)
 define void @V6_vmaskedstorentnq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1> %1, i8* %b, <16 x i32> %c)
   ret void
 }

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
index 453f690f89f3..c54cd95daf78 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
@@ -19,6 +19,8 @@
 ; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
 ; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new
 
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
+
 declare void @llvm.hexagon.V6.vgathermw.128B(i8*, i32, i32, <32 x i32>)
 define void @V6_vgathermw_128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
   call void @llvm.hexagon.V6.vgathermw.128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d)
@@ -37,24 +39,24 @@ define void @V6_vgathermhw_128B(i8* %a, i32 %b, i32 %c, <64 x i32> %d) {
   ret void
 }
 
-declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
 define void @V6_vgathermwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
-  %1 = bitcast <32 x i32> %b to <1024 x i1>
-  call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
+  call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
 define void @V6_vgathermhq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
-  %1 = bitcast <32 x i32> %b to <1024 x i1>
-  call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
+  call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <1024 x i1>, i32, i32, <64 x i32>)
+declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <128 x i1>, i32, i32, <64 x i32>)
 define void @V6_vgathermhwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <64 x i32> %e) {
-  %1 = bitcast <32 x i32> %b to <1024 x i1>
-  call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
+  call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
index bc8591527c0d..c3a3b15ea1be 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
@@ -19,6 +19,8 @@
 ; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
 ; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new
 
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
+
 declare void @llvm.hexagon.V6.vgathermw(i8*, i32, i32, <16 x i32>)
 define void @V6_vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d) {
   call void @llvm.hexagon.V6.vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d)
@@ -37,23 +39,23 @@ define void @V6_vgathermhw(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
   ret void
 }
 
-declare void @llvm.hexagon.V6.vgathermwq(i8*, <512 x i1>, i32, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vgathermwq(i8*, <64 x i1>, i32, i32, <16 x i32>)
 define void @V6_vgathermwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  call void @llvm.hexagon.V6.vgathermwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  call void @llvm.hexagon.V6.vgathermwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vgathermhq(i8*, <512 x i1>, i32, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vgathermhq(i8*, <64 x i1>, i32, i32, <16 x i32>)
 define void @V6_vgathermhq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  call void @llvm.hexagon.V6.vgathermhq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  call void @llvm.hexagon.V6.vgathermhq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vgathermhwq(i8*, <512 x i1>, i32, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vgathermhwq(i8*, <64 x i1>, i32, i32, <32 x i32>)
 define void @V6_vgathermhwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
-  %1 = bitcast <16 x i32> %b to <512 x i1>
-  call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+  call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
   ret void
 }

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
index 40366fa3af1d..18ae2bac611f 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
@@ -19,6 +19,7 @@
 ; CHECK-LABEL: V6_vscattermhwq_128B
 ; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}
 
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
 
 declare void @llvm.hexagon.V6.vscattermw.128B(i32, i32, <32 x i32>, <32 x i32>)
 define void @V6_vscattermw_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d) {
@@ -44,17 +45,17 @@ define void @V6_vscattermh_add_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d
   ret void
 }
 
-declare void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
+declare void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
 define void @V6_vscattermwq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
+declare void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
 define void @V6_vscattermhq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
   ret void
 }
 
@@ -70,9 +71,9 @@ define void @V6_vscattermhw_add_128B(i32 %a, i32 %b, <64 x i32> %c, <32 x i32> %
   ret void
 }
 
-declare void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1>, i32, i32, <64 x i32>, <32 x i32>)
+declare void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1>, i32, i32, <64 x i32>, <32 x i32>)
 define void @V6_vscattermhwq_128B(<32 x i32> %a, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e) {
-  %1 = bitcast <32 x i32> %a to <1024 x i1>
-  call void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
+  %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
   ret void
 }

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
index 405211c5dfac..1a61ee8b9c62 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
@@ -19,6 +19,7 @@
 ; CHECK-LABEL: V6_vscattermhwq
 ; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}
 
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
 
 declare void @llvm.hexagon.V6.vscattermw(i32, i32, <16 x i32>, <16 x i32>)
 define void @V6_vscattermw(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
@@ -44,17 +45,17 @@ define void @V6_vscattermh_add(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
   ret void
 }
 
-declare void @llvm.hexagon.V6.vscattermwq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
+declare void @llvm.hexagon.V6.vscattermwq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
 define void @V6_vscattermwq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vscattermwq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vscattermwq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
   ret void
 }
 
-declare void @llvm.hexagon.V6.vscattermhq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
+declare void @llvm.hexagon.V6.vscattermhq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
 define void @V6_vscattermhq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vscattermhq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vscattermhq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
   ret void
 }
 
@@ -70,9 +71,9 @@ define void @V6_vscattermhw_add(i32 %a, i32 %b, <32 x i32> %c, <16 x i32> %d) {
   ret void
 }
 
-declare void @llvm.hexagon.V6.vscattermhwq(<512 x i1>, i32, i32, <32 x i32>, <16 x i32>)
+declare void @llvm.hexagon.V6.vscattermhwq(<64 x i1>, i32, i32, <32 x i32>, <16 x i32>)
 define void @V6_vscattermhwq(<16 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e) {
-  %1 = bitcast <16 x i32> %a to <512 x i1>
-  call void @llvm.hexagon.V6.vscattermhwq(<512 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+  call void @llvm.hexagon.V6.vscattermhwq(<64 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
   ret void
 }

diff  --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
index 8d503f11800f..85f3c8e7cb16 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
@@ -136,21 +136,21 @@ define <16 x i32> @V6_vmpyuhe(<16 x i32> %a, i32 %b) {
 }
 ; CHECK: = vmpye(v0.uh,r0.uh)
 
-;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1>)
-;define <16 x i32> @V6_vprefixqb(<512 x i1> %a) {
-;  %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1> %a)
+;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1>)
+;define <16 x i32> @V6_vprefixqb(<64 x i1> %a) {
+;  %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1> %a)
 ;  ret <16 x i32> %b
 ;}
 
-;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1>)
-;define <16 x i32> @V6_vprefixqh(<512 x i1> %a) {
-;  %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1> %a)
+;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1>)
+;define <16 x i32> @V6_vprefixqh(<64 x i1> %a) {
+;  %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1> %a)
 ;  ret <16 x i32> %b
 ;}
 
-;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1>)
-;define <16 x i32> @V6_vprefixqw(<512 x i1> %a) {
-;  %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1> %a)
+;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1>)
+;define <16 x i32> @V6_vprefixqw(<64 x i1> %a) {
+;  %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1> %a)
 ;  ret <16 x i32> %b
 ;}
 

diff  --git a/llvm/test/CodeGen/Hexagon/late_instr.ll b/llvm/test/CodeGen/Hexagon/late_instr.ll
index c21e0140ca06..7825ef96d2ff 100644
--- a/llvm/test/CodeGen/Hexagon/late_instr.ll
+++ b/llvm/test/CodeGen/Hexagon/late_instr.ll
@@ -28,10 +28,10 @@ b0:
   %v13 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v12)
   %v14 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v13)
   %v15 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v14)
-  %v16 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
+  %v16 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
   %v17 = shl i32 1, %v8
   %v18 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v17)
-  %v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <512 x i1> %v16, i32 %v18)
+  %v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <64 x i1> %v16, i32 %v18)
   %v20 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %a3)
   %v21 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v20)
   %v22 = icmp sgt i32 %v5, 0
@@ -48,8 +48,8 @@ b1:                                               ; preds = %b0
   %v30 = getelementptr inbounds i8, i8* %a0, i32 %v29
   %v31 = bitcast i8* %v30 to <16 x i32>*
   %v32 = load <16 x i32>, <16 x i32>* %v31, align 64, !tbaa !0
-  %v33 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
-  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v33, i32 16843009)
+  %v33 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
+  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v33, i32 16843009)
   %v35 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v34)
   %v36 = add i32 %v0, %a5
   %v37 = getelementptr inbounds i8, i8* %a0, i32 %v36
@@ -127,11 +127,11 @@ b4:                                               ; preds = %b4, %b3
   %v100 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v94, <16 x i32> %v91)
   %v101 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v97, <16 x i32> %v99)
   %v102 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v98, <16 x i32> %v100)
-  %v103 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
-  %v104 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
-  %v105 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %v103, <512 x i1> %v104)
+  %v103 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
+  %v104 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
+  %v105 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %v103, <64 x i1> %v104)
   %v106 = tail call i32 @llvm.hexagon.S6.rol.i.r(i32 %v83, i32 1)
-  %v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <512 x i1> %v105, i32 %v106)
+  %v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <64 x i1> %v105, i32 %v106)
   %v108 = add nsw i32 %v79, -64
   %v109 = icmp sgt i32 %v79, 64
   br i1 %v109, label %b4, label %b5
@@ -179,16 +179,16 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.hexagon.S2.vsplatrb(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vd0() #1
@@ -212,10 +212,10 @@ declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.hexagon.S6.rol.i.r(i32, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll b/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
index e161f075530e..906c5bfe4033 100644
--- a/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
@@ -15,35 +15,35 @@ b0:
   br i1 %v0, label %b1, label %b2
 
 b1:                                               ; preds = %b0
-  %v1 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> undef) #2
+  %v1 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> undef) #2
   br label %b2
 
 b2:                                               ; preds = %b1, %b0
-  %v2 = phi <1024 x i1> [ %v1, %b1 ], [ undef, %b0 ]
+  %v2 = phi <128 x i1> [ %v1, %b1 ], [ undef, %b0 ]
   br label %b3
 
 b3:                                               ; preds = %b3, %b2
-  %v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
+  %v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
   %v4 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> undef, <32 x i32> %v3) #2
   %v5 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> %v4, <32 x i32> undef) #2
-  %v6 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
-  %v7 = tail call <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1> %v6, <1024 x i1> undef) #2
-  %v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
-  tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> %v8) #2
+  %v6 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
+  %v7 = tail call <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1> %v6, <128 x i1> undef) #2
+  %v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
+  tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> %v8) #2
   br label %b3
 }
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1>, <1024 x i1>) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1>, <128 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32>, <32 x i32>) #1

diff  --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
index e051303e02ff..3eb0c5e74725 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
@@ -25,36 +25,36 @@ b3:                                               ; preds = %b3, %b2
   %v7 = load <16 x i32>, <16 x i32>* %v6, align 64, !tbaa !0
   %v8 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> undef, <16 x i32> %v7, i32 4)
   %v9 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v8, <16 x i32> zeroinitializer)
-  %v10 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
-  %v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
-  %v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
-  %v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
+  %v10 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
+  %v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
+  %v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
+  %v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
   %v14 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> undef, i32 1)
   %v15 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v14, <16 x i32> zeroinitializer)
   %v16 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
-  %v17 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
-  %v18 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
-  %v19 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
-  %v20 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
-  %v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
-  %v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
+  %v17 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
+  %v18 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
+  %v19 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
+  %v20 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
+  %v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
+  %v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
   %v23 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v22, <16 x i32> %v21)
   %v24 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> zeroinitializer, <32 x i32> %v23, i32 16843009)
-  %v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
-  %v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
-  %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
-  %v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
+  %v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
+  %v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
+  %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
+  %v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
   %v29 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> undef, <16 x i32> zeroinitializer)
   %v30 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
-  %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
-  %v32 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
-  %v33 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
+  %v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
+  %v32 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
+  %v33 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
   %v34 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v24, <32 x i32> zeroinitializer, i32 16843009)
   %v35 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v34, <32 x i32> undef, i32 16843009)
-  %v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
-  %v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
-  %v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
-  %v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
+  %v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
+  %v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
+  %v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
+  %v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
   %v40 = add nsw i32 %v3, 3
   %v41 = icmp eq i32 %v40, 5
   br i1 %v41, label %b4, label %b3
@@ -85,13 +85,13 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
index 49e451d54ea1..91fb350daab5 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
@@ -28,13 +28,13 @@ declare i32 @printf(i8*, ...) #0
 declare void @print_vecpred(i32, i8*) #0
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 
 ; Function Attrs: nounwind
 declare void @init_vectors() #0
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
@@ -43,7 +43,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 declare void @init_addresses() #0
 
 ; Function Attrs: nounwind
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind
 define i32 @main() #0 {
@@ -63,13 +63,13 @@ entry:
   %7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
   %call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
   %8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
+  %9 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
   call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
   %10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
-  %12 = bitcast <512 x i1> %11 to <16 x i32>
-  %13 = bitcast <16 x i32> %12 to <512 x i1>
-  %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
+  %11 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
+  %12 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %11, i32 -1)
+  %13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
+  %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %13, <16 x i32> undef, <16 x i32> undef)
   store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
   ret i32 0
 }

diff  --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
index 641af59a7c53..138b7321086b 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
@@ -114,12 +114,12 @@ b4:                                               ; preds = %b3
   %v91 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v90, i32 1)
   %v92 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v91, i32 1)
   %v93 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v92, i32 1)
-  %v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
-  %v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
+  %v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
+  %v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
   %v96 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> undef, i32 1)
-  %v97 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
-  %v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
-  %v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> undef)
+  %v97 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
+  %v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
+  %v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> undef)
   %v100 = tail call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %v99, <16 x i32> %v98)
   %v101 = tail call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %v100, <16 x i32> undef)
   %v102 = getelementptr inbounds <16 x i32>, <16 x i32>* %v2, i32 1
@@ -183,13 +183,13 @@ declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1
 declare <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32>, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll b/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
index bc878e09ef94..8ffa4659a9dd 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
@@ -82,14 +82,14 @@ entry:
   %asmresult58 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 29
   %asmresult59 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 30
   %asmresult60 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 31
-  %2 = tail call { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } asm "nop", "=q,=q,=q,=q"() #1
-  %asmresult61 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 0
-  %asmresult62 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 1
-  %asmresult63 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 2
-  %asmresult64 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 3
-  %3 = tail call <16 x i32> asm "nop", "=q,q,q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63, <16 x i32> %asmresult64) #1
-  tail call void asm sideeffect "nop", "q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63) #2
-  tail call void asm sideeffect "nop", "q,q"(<16 x i32> %asmresult64, <16 x i32> %3) #2
+  %2 = tail call { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } asm "nop", "=q,=q,=q,=q"() #1
+  %asmresult61 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 0
+  %asmresult62 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 1
+  %asmresult63 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 2
+  %asmresult64 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 3
+  %3 = tail call <64 x i1> asm "nop", "=q,q,q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63, <64 x i1> %asmresult64) #1
+  tail call void asm sideeffect "nop", "q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63) #2
+  tail call void asm sideeffect "nop", "q,q"(<64 x i1> %asmresult64, <64 x i1> %3) #2
   tail call void asm sideeffect "nop", "v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v"(<16 x i32> %asmresult29, <16 x i32> %asmresult30, <16 x i32> %asmresult31, <16 x i32> %asmresult32, <16 x i32> %asmresult33, <16 x i32> %asmresult34, <16 x i32> %asmresult35, <16 x i32> %asmresult36, <16 x i32> %asmresult37, <16 x i32> %asmresult38, <16 x i32> %asmresult39, <16 x i32> %asmresult40, <16 x i32> %asmresult41, <16 x i32> %asmresult42, <16 x i32> %asmresult43, <16 x i32> %asmresult44, <16 x i32> %asmresult45, <16 x i32> %asmresult46, <16 x i32> %asmresult47, <16 x i32> %asmresult48, <16 x i32> %asmresult49, <16 x i32> %asmresult50, <16 x i32> %asmresult51, <16 x i32> %asmresult52, <16 x i32> %asmresult53, <16 x i32> %asmresult54, <16 x i32> %asmresult55, <16 x i32> %asmresult56, <16 x i32> %asmresult57, <16 x i32> %asmresult58, <16 x i32> %asmresult59, <16 x i32> %asmresult60) #2
   tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14, i32 %asmresult15, i32 %asmresult16, i32 %asmresult17, i32 %asmresult18, i32 %asmresult19, i32 %asmresult20, i32 %asmresult21, i32 %asmresult22, i32 %asmresult23, i32 %asmresult24, i32 %asmresult25, i32 %asmresult26, i32 %asmresult27, i32 %asmresult28) #2
   ret void

diff  --git a/llvm/test/CodeGen/Hexagon/split-vecpred.ll b/llvm/test/CodeGen/Hexagon/split-vecpred.ll
index 615eb88926c9..c3c0e18b2b26 100644
--- a/llvm/test/CodeGen/Hexagon/split-vecpred.ll
+++ b/llvm/test/CodeGen/Hexagon/split-vecpred.ll
@@ -32,7 +32,7 @@ b7:                                               ; preds = %b6
   br label %b8
 
 b8:                                               ; preds = %b7
-  %v0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
+  %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
   br i1 undef, label %b9, label %b11
 
 b9:                                               ; preds = %b8
@@ -42,9 +42,9 @@ b10:                                              ; preds = %b12
   br label %b11
 
 b11:                                              ; preds = %b10, %b8
-  %v1 = phi <512 x i1> [ %v0, %b8 ], [ undef, %b10 ]
-  %v2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> undef)
-  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
+  %v1 = phi <64 x i1> [ %v0, %b8 ], [ undef, %b10 ]
+  %v2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> undef)
+  %v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
   %v4 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %v3, i32 undef)
   %v5 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v4, <16 x i32> undef, i32 undef)
   %v6 = tail call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %v5, <16 x i32> undef)
@@ -53,9 +53,9 @@ b11:                                              ; preds = %b10, %b8
   %v9 = tail call <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32> undef, <16 x i32> %v8)
   %v10 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v9)
   %v11 = tail call <16 x i32> @llvm.hexagon.V6.vor(<16 x i32> %v10, <16 x i32> undef)
-  %v12 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
-  %v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v12, i32 undef)
-  tail call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> undef, i8* undef, <16 x i32> %v13)
+  %v12 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
+  %v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v12, i32 undef)
+  tail call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> undef, i8* undef, <16 x i32> %v13)
   unreachable
 
 b12:                                              ; preds = %b12, %b9
@@ -69,22 +69,22 @@ b13:                                              ; preds = %b5
 }
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>) #2
+declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>) #2
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vor(<16 x i32>, <16 x i32>) #1

diff  --git a/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll b/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
index 14b04a1cfe64..b6af6d5a50d7 100644
--- a/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
@@ -25,9 +25,9 @@ b3:                                               ; preds = %b3, %b2
   %v2 = phi i32 [ 0, %b2 ], [ %v8, %b3 ]
   %v3 = phi <32 x i32> [ zeroinitializer, %b2 ], [ %v0, %b3 ]
   %v4 = phi <32 x i32> [ %v1, %b2 ], [ %v7, %b3 ]
-  %v5 = tail call <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
-  %v6 = tail call <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
-  %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
+  %v5 = tail call <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
+  %v6 = tail call <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
+  %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
   %v8 = add nsw i32 %v2, 1
   %v9 = icmp slt i32 %v8, %a2
   br i1 %v9, label %b3, label %b4
@@ -40,13 +40,13 @@ b5:                                               ; preds = %b4, %b0
 }
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1

diff  --git a/llvm/test/CodeGen/Hexagon/swp-sigma.ll b/llvm/test/CodeGen/Hexagon/swp-sigma.ll
index 165174282099..1e376323a32f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-sigma.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-sigma.ll
@@ -17,9 +17,9 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
 declare <16 x i32> @llvm.hexagon.V6.vd0() #0
 declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #0
 declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
 declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0
 declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #0
 declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
@@ -106,48 +106,48 @@ b6:                                               ; preds = %b6, %b5
   %v53 = tail call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %v8, <16 x i32> %v47) #2
   %v54 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v45, <16 x i32> %v47) #2
   %v55 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v49, <16 x i32> %v47) #2
-  %v56 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
-  %v57 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
-  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
-  %v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
-  %v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
-  %v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
+  %v56 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
+  %v57 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
+  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
+  %v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
+  %v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
+  %v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
   %v62 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v61, <16 x i32> %v60) #2
   %v63 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v53, <32 x i32> %v62, i32 -1) #2
   %v64 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v45, <16 x i32> %v44, i32 1) #2
   %v65 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v49, <16 x i32> %v48, i32 1) #2
   %v66 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v64, <16 x i32> %v47) #2
   %v67 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v65, <16 x i32> %v47) #2
-  %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
-  %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
-  %v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
-  %v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
-  %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
-  %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
+  %v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
+  %v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
+  %v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
+  %v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
+  %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
+  %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
   %v74 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v73, <16 x i32> %v72) #2
   %v75 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v63, <32 x i32> %v74, i32 -1) #2
   %v76 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v50, <16 x i32> %v45, i32 1) #2
   %v77 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v52, <16 x i32> %v49, i32 1) #2
   %v78 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v76, <16 x i32> %v47) #2
   %v79 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v77, <16 x i32> %v47) #2
-  %v80 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
-  %v81 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
-  %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
-  %v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
-  %v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
-  %v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
+  %v80 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
+  %v81 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
+  %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
+  %v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
+  %v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
+  %v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
   %v86 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v85, <16 x i32> %v84) #2
   %v87 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v75, <32 x i32> %v86, i32 -1) #2
   %v88 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v47, <16 x i32> %v46, i32 1) #2
   %v89 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v51, <16 x i32> %v47, i32 1) #2
   %v90 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v88, <16 x i32> %v47) #2
   %v91 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v89, <16 x i32> %v47) #2
-  %v92 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
-  %v93 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
-  %v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
-  %v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
-  %v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
-  %v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
+  %v92 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
+  %v93 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
+  %v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
+  %v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
+  %v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
+  %v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
   %v98 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v97, <16 x i32> %v96) #2
   %v99 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v87, <32 x i32> %v98, i32 -1) #2
   %v100 = tail call <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32> %v95, <16 x i32> %v4, i32 0) #2

diff  --git a/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll b/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
index 4605b1c9b0a0..fada5c11732d 100644
--- a/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
@@ -12,8 +12,9 @@ b0:
   store i32 %a0, i32* %v0, align 4
   store <16 x i32> %a1, <16 x i32>* %v1, align 64
   %v3 = load i32, i32* %v0, align 4
-  %v4 = load <16 x i32>, <16 x i32>* %v2, align 64
-  call void asm sideeffect "  $1 = vsetq($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1, !srcloc !0
+  %v4 = tail call <64 x i1> asm sideeffect "  $0 = vsetq($1);\0A", "=q,r"(i32 %v3) #1, !srcloc !0
+  %v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
+  store <16 x i32> %v5, <16 x i32>* %v2, align 64
   ret void
 }
 
@@ -23,7 +24,9 @@ b0:
   ret i32 0
 }
 
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind readnone }
 
 !0 = !{i32 222}

diff  --git a/llvm/test/CodeGen/Hexagon/v6-spill1.ll b/llvm/test/CodeGen/Hexagon/v6-spill1.ll
index 788c27d42382..c2f37d44b288 100644
--- a/llvm/test/CodeGen/Hexagon/v6-spill1.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-spill1.ll
@@ -44,10 +44,10 @@ b3:                                               ; preds = %b3, %b2
   %v28 = bitcast i8* %v27 to <16 x i32>*
   %v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
   %v30 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v25, <16 x i32> %v14)
-  %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
-  %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
+  %v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
+  %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
   %v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
-  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
+  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
   %v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
   %v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
   %v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
@@ -56,22 +56,22 @@ b3:                                               ; preds = %b3, %b2
   %v40 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v36, <16 x i32> %v14)
   %v41 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v37, <16 x i32> %v14)
   %v42 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v38, <16 x i32> %v14)
-  %v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
-  %v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
-  %v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
-  %v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
-  %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
-  %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
-  %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
-  %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
+  %v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
+  %v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
+  %v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
+  %v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
+  %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
+  %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
+  %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
+  %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
   %v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
   %v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
   %v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
   %v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
-  %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
-  %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
-  %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
-  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
+  %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
+  %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
+  %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
+  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
   %v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
   %v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
   %v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
@@ -80,22 +80,22 @@ b3:                                               ; preds = %b3, %b2
   %v64 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v60, <16 x i32> %v14)
   %v65 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v61, <16 x i32> %v14)
   %v66 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v62, <16 x i32> %v14)
-  %v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
-  %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
-  %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
-  %v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
-  %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
-  %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
-  %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
-  %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
+  %v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
+  %v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
+  %v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
+  %v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
+  %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
+  %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
+  %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
+  %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
   %v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
   %v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
   %v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
   %v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
-  %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
-  %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
-  %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
-  %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
+  %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
+  %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
+  %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
+  %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
   %v83 = add nsw i32 %v15, 1
   %v84 = icmp eq i32 %v83, 5
   br i1 %v84, label %b4, label %b3
@@ -147,16 +147,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll b/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
index be53694c40fa..a6eb739f8471 100644
--- a/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
@@ -28,7 +28,7 @@ b3:                                               ; preds = %b2
 
 b4:                                               ; preds = %b4, %b3
   %v3 = phi <32 x i32> [ %v5, %b4 ], [ undef, %b3 ]
-  %v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
+  %v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
   %v5 = tail call <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32> %v3, <32 x i32> %v2) #2
   br label %b4
 
@@ -43,7 +43,7 @@ declare void @f1(i8* nocapture readonly, i8* nocapture readonly, i8* nocapture,
 declare <32 x i32> @llvm.hexagon.V6.vd0.128B() #1
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32>, <32 x i32>) #1

diff  --git a/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll b/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
index ed0fb0592541..c5cba8cf6155 100644
--- a/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
@@ -32,76 +32,76 @@ b0:
   %v2 = call <16 x i32> @llvm.hexagon.V6.vd0()
   store <16 x i32> %v2, <16 x i32>* @g2, align 64
   %v3 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v4 = bitcast <16 x i32> %v3 to <512 x i1>
+  %v4 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v3, i32 -1)
   %v5 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v6 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
+  %v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
   store <16 x i32> %v7, <16 x i32>* @g2, align 64
   %v8 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v9 = bitcast <16 x i32> %v8 to <512 x i1>
+  %v9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v8, i32 -1)
   %v10 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v11 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
+  %v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
   store <16 x i32> %v12, <16 x i32>* @g2, align 64
   %v13 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v14 = bitcast <16 x i32> %v13 to <512 x i1>
+  %v14 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v13, i32 -1)
   %v15 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v16 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
+  %v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
   store <16 x i32> %v17, <16 x i32>* @g2, align 64
   %v18 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v19 = bitcast <16 x i32> %v18 to <512 x i1>
+  %v19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v18, i32 -1)
   %v20 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v21 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
+  %v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
   store <16 x i32> %v22, <16 x i32>* @g2, align 64
   %v23 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v24 = bitcast <16 x i32> %v23 to <512 x i1>
+  %v24 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v23, i32 -1)
   %v25 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v26 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
+  %v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
   store <16 x i32> %v27, <16 x i32>* @g2, align 64
   %v28 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v29 = bitcast <16 x i32> %v28 to <512 x i1>
+  %v29 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v28, i32 -1)
   %v30 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v31 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
+  %v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
   store <16 x i32> %v32, <16 x i32>* @g2, align 64
   %v33 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v34 = bitcast <16 x i32> %v33 to <512 x i1>
+  %v34 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v33, i32 -1)
   %v35 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v36 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
+  %v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
   store <16 x i32> %v37, <16 x i32>* @g2, align 64
   %v38 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v39 = bitcast <16 x i32> %v38 to <512 x i1>
+  %v39 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v38, i32 -1)
   %v40 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v41 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
+  %v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
   store <16 x i32> %v42, <16 x i32>* @g2, align 64
   %v43 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v44 = bitcast <16 x i32> %v43 to <512 x i1>
+  %v44 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v43, i32 -1)
   %v45 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v46 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
+  %v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
   store <16 x i32> %v47, <16 x i32>* @g2, align 64
   %v48 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v49 = bitcast <16 x i32> %v48 to <512 x i1>
+  %v49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v48, i32 -1)
   %v50 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v51 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
+  %v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
   store <16 x i32> %v52, <16 x i32>* @g2, align 64
   %v53 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v54 = bitcast <16 x i32> %v53 to <512 x i1>
+  %v54 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v53, i32 -1)
   %v55 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v56 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
+  %v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
   store <16 x i32> %v57, <16 x i32>* @g2, align 64
   %v58 = load <16 x i32>, <16 x i32>* @g3, align 64
-  %v59 = bitcast <16 x i32> %v58 to <512 x i1>
+  %v59 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v58, i32 -1)
   %v60 = load <16 x i32>, <16 x i32>* @g2, align 64
   %v61 = load <16 x i32>, <16 x i32>* @g1, align 64
-  %v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
+  %v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
   store <16 x i32> %v62, <16 x i32>* @g2, align 64
   ret i32 0
 }
@@ -110,40 +110,43 @@ b0:
 declare <16 x i32> @llvm.hexagon.V6.vd0() #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll b/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
index e80a9212e06a..f03a0cc438f0 100644
--- a/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
@@ -48,10 +48,10 @@ b3:                                               ; preds = %b3, %b2
   %v28 = bitcast i8* %v27 to <16 x i32>*
   %v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
   %v30 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v25, <16 x i32> %v14)
-  %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
-  %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
+  %v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
+  %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
   %v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
-  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
+  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
   %v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
   %v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
   %v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
@@ -60,22 +60,22 @@ b3:                                               ; preds = %b3, %b2
   %v40 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v36, <16 x i32> %v14)
   %v41 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v37, <16 x i32> %v14)
   %v42 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v38, <16 x i32> %v14)
-  %v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
-  %v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
-  %v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
-  %v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
-  %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
-  %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
-  %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
-  %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
+  %v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
+  %v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
+  %v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
+  %v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
+  %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
+  %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
+  %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
+  %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
   %v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
   %v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
   %v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
   %v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
-  %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
-  %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
-  %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
-  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
+  %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
+  %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
+  %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
+  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
   %v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
   %v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
   %v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
@@ -84,22 +84,22 @@ b3:                                               ; preds = %b3, %b2
   %v64 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v60, <16 x i32> %v14)
   %v65 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v61, <16 x i32> %v14)
   %v66 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v62, <16 x i32> %v14)
-  %v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
-  %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
-  %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
-  %v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
-  %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
-  %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
-  %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
-  %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
+  %v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
+  %v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
+  %v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
+  %v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
+  %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
+  %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
+  %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
+  %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
   %v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
   %v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
   %v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
   %v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
-  %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
-  %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
-  %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
-  %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
+  %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
+  %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
+  %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
+  %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
   %v83 = add nsw i32 %v15, 1
   %v84 = icmp eq i32 %v83, 5
   br i1 %v84, label %b4, label %b3
@@ -151,16 +151,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/v60-vsel1.ll b/llvm/test/CodeGen/Hexagon/v60-vsel1.ll
index 71d112cc7357..5da450b80459 100644
--- a/llvm/test/CodeGen/Hexagon/v60-vsel1.ll
+++ b/llvm/test/CodeGen/Hexagon/v60-vsel1.ll
@@ -14,8 +14,8 @@ entry:
   %add = add i32 %sub, %rem
   %2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
   %3 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
-  %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <512 x i1> %4, i32 12)
+  %4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
+  %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <64 x i1> %4, i32 12)
   %and4 = and i32 %add, 511
   %cmp = icmp eq i32 %and4, 0
   %sMaskR.0 = select i1 %cmp, <16 x i32> %2, <16 x i32> %5
@@ -23,8 +23,8 @@ entry:
   br i1 %cmp547, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %6 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
-  %7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %6, i32 16843009)
+  %6 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
+  %7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %6, i32 16843009)
   %8 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %7)
   %9 = add i32 %rem, %xsize
   %10 = add i32 %9, -1
@@ -59,9 +59,9 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 }
 
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
 

diff  --git a/llvm/test/CodeGen/Hexagon/v60-vsel2.ll b/llvm/test/CodeGen/Hexagon/v60-vsel2.ll
index 7dc06bb88e9c..8db3dd4ded0f 100644
--- a/llvm/test/CodeGen/Hexagon/v60-vsel2.ll
+++ b/llvm/test/CodeGen/Hexagon/v60-vsel2.ll
@@ -14,8 +14,8 @@ b0:
   %v4 = add i32 %v2, %v3
   %v5 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
   %v6 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %v7 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
-  %v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <512 x i1> %v7, i32 12)
+  %v7 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
+  %v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <64 x i1> %v7, i32 12)
   %v9 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v8, <16 x i32> %v8)
   %v10 = and i32 %v4, 511
   %v11 = icmp eq i32 %v10, 0
@@ -31,8 +31,8 @@ b2:                                               ; preds = %b1, %b0
   br i1 %v14, label %b3, label %b6
 
 b3:                                               ; preds = %b2
-  %v15 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
-  %v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v15, i32 16843009)
+  %v15 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
+  %v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v15, i32 16843009)
   %v17 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v16)
   %v18 = add i32 %v3, %a1
   %v19 = add i32 %v18, -1
@@ -71,16 +71,16 @@ b6:                                               ; preds = %b5, %b2
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1

diff  --git a/llvm/test/CodeGen/Hexagon/v60Intrins.ll b/llvm/test/CodeGen/Hexagon/v60Intrins.ll
index 45c122c1cb83..61087f573e6a 100644
--- a/llvm/test/CodeGen/Hexagon/v60Intrins.ll
+++ b/llvm/test/CodeGen/Hexagon/v60Intrins.ll
@@ -372,291 +372,291 @@ entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval, align 4
   %0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %1 = bitcast <16 x i32> %0 to <512 x i1>
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -1)
   %2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %3 = bitcast <16 x i32> %2 to <512 x i1>
-  %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
-  %5 = bitcast <512 x i1> %4 to <16 x i32>
+  %3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %2, i32 -1)
+  %4 = call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %1, <64 x i1> %3)
+  %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %4, i32 -1)
   store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
   %6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %7 = bitcast <16 x i32> %6 to <512 x i1>
+  %7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %6, i32 -1)
   %8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %9 = bitcast <16 x i32> %8 to <512 x i1>
-  %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
-  %11 = bitcast <512 x i1> %10 to <16 x i32>
+  %9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 -1)
+  %10 = call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %7, <64 x i1> %9)
+  %11 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %10, i32 -1)
   store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
   %12 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %13 = bitcast <16 x i32> %12 to <512 x i1>
-  %14 = call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %13)
-  %15 = bitcast <512 x i1> %14 to <16 x i32>
+  %13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
+  %14 = call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %13)
+  %15 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %14, i32 -1)
   store volatile <16 x i32> %15, <16 x i32>* @Q6VecPredResult, align 64
   %16 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %17 = bitcast <16 x i32> %16 to <512 x i1>
+  %17 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %16, i32 -1)
   %18 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %19 = bitcast <16 x i32> %18 to <512 x i1>
-  %20 = call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %17, <512 x i1> %19)
-  %21 = bitcast <512 x i1> %20 to <16 x i32>
+  %19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %18, i32 -1)
+  %20 = call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %17, <64 x i1> %19)
+  %21 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %20, i32 -1)
   store volatile <16 x i32> %21, <16 x i32>* @Q6VecPredResult, align 64
   %22 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %23 = bitcast <16 x i32> %22 to <512 x i1>
+  %23 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %22, i32 -1)
   %24 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %25 = bitcast <16 x i32> %24 to <512 x i1>
-  %26 = call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %23, <512 x i1> %25)
-  %27 = bitcast <512 x i1> %26 to <16 x i32>
+  %25 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %24, i32 -1)
+  %26 = call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %23, <64 x i1> %25)
+  %27 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %26, i32 -1)
   store volatile <16 x i32> %27, <16 x i32>* @Q6VecPredResult, align 64
   %28 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
-  %29 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
-  %30 = bitcast <512 x i1> %29 to <16 x i32>
+  %29 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
+  %30 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %29, i32 -1)
   store volatile <16 x i32> %30, <16 x i32>* @Q6VecPredResult, align 64
   %31 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %32 = bitcast <16 x i32> %31 to <512 x i1>
+  %32 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %31, i32 -1)
   %33 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
-  %34 = call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %32, <16 x i32> %33, i32 -1)
-  %35 = bitcast <512 x i1> %34 to <16 x i32>
+  %34 = call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %32, <16 x i32> %33, i32 -1)
+  %35 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %34, i32 -1)
   store volatile <16 x i32> %35, <16 x i32>* @Q6VecPredResult, align 64
   %36 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %37 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %38 = call <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
-  %39 = bitcast <512 x i1> %38 to <16 x i32>
+  %38 = call <64 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
+  %39 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %38, i32 -1)
   store volatile <16 x i32> %39, <16 x i32>* @Q6VecPredResult, align 64
   %40 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %41 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %42 = call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
-  %43 = bitcast <512 x i1> %42 to <16 x i32>
+  %42 = call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
+  %43 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %42, i32 -1)
   store volatile <16 x i32> %43, <16 x i32>* @Q6VecPredResult, align 64
   %44 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %45 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %46 = call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
-  %47 = bitcast <512 x i1> %46 to <16 x i32>
+  %46 = call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
+  %47 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %46, i32 -1)
   store volatile <16 x i32> %47, <16 x i32>* @Q6VecPredResult, align 64
   %48 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %49 = bitcast <16 x i32> %48 to <512 x i1>
+  %49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %48, i32 -1)
   %50 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %51 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %52 = call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %49, <16 x i32> %50, <16 x i32> %51)
-  %53 = bitcast <512 x i1> %52 to <16 x i32>
+  %52 = call <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1> %49, <16 x i32> %50, <16 x i32> %51)
+  %53 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %52, i32 -1)
   store volatile <16 x i32> %53, <16 x i32>* @Q6VecPredResult, align 64
   %54 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %55 = bitcast <16 x i32> %54 to <512 x i1>
+  %55 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %54, i32 -1)
   %56 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %57 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %58 = call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %55, <16 x i32> %56, <16 x i32> %57)
-  %59 = bitcast <512 x i1> %58 to <16 x i32>
+  %58 = call <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1> %55, <16 x i32> %56, <16 x i32> %57)
+  %59 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %58, i32 -1)
   store volatile <16 x i32> %59, <16 x i32>* @Q6VecPredResult, align 64
   %60 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %61 = bitcast <16 x i32> %60 to <512 x i1>
+  %61 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %60, i32 -1)
   %62 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %63 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %64 = call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %61, <16 x i32> %62, <16 x i32> %63)
-  %65 = bitcast <512 x i1> %64 to <16 x i32>
+  %64 = call <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1> %61, <16 x i32> %62, <16 x i32> %63)
+  %65 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %64, i32 -1)
   store volatile <16 x i32> %65, <16 x i32>* @Q6VecPredResult, align 64
   %66 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %67 = bitcast <16 x i32> %66 to <512 x i1>
+  %67 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %66, i32 -1)
   %68 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %69 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %70 = call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %67, <16 x i32> %68, <16 x i32> %69)
-  %71 = bitcast <512 x i1> %70 to <16 x i32>
+  %70 = call <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1> %67, <16 x i32> %68, <16 x i32> %69)
+  %71 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %70, i32 -1)
   store volatile <16 x i32> %71, <16 x i32>* @Q6VecPredResult, align 64
   %72 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %73 = bitcast <16 x i32> %72 to <512 x i1>
+  %73 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %72, i32 -1)
   %74 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %75 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %76 = call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %73, <16 x i32> %74, <16 x i32> %75)
-  %77 = bitcast <512 x i1> %76 to <16 x i32>
+  %76 = call <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1> %73, <16 x i32> %74, <16 x i32> %75)
+  %77 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %76, i32 -1)
   store volatile <16 x i32> %77, <16 x i32>* @Q6VecPredResult, align 64
   %78 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %79 = bitcast <16 x i32> %78 to <512 x i1>
+  %79 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %78, i32 -1)
   %80 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %81 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %82 = call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %79, <16 x i32> %80, <16 x i32> %81)
-  %83 = bitcast <512 x i1> %82 to <16 x i32>
+  %82 = call <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1> %79, <16 x i32> %80, <16 x i32> %81)
+  %83 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %82, i32 -1)
   store volatile <16 x i32> %83, <16 x i32>* @Q6VecPredResult, align 64
   %84 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %85 = bitcast <16 x i32> %84 to <512 x i1>
+  %85 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %84, i32 -1)
   %86 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %87 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %88 = call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %85, <16 x i32> %86, <16 x i32> %87)
-  %89 = bitcast <512 x i1> %88 to <16 x i32>
+  %88 = call <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1> %85, <16 x i32> %86, <16 x i32> %87)
+  %89 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %88, i32 -1)
   store volatile <16 x i32> %89, <16 x i32>* @Q6VecPredResult, align 64
   %90 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %91 = bitcast <16 x i32> %90 to <512 x i1>
+  %91 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %90, i32 -1)
   %92 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %93 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %94 = call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %91, <16 x i32> %92, <16 x i32> %93)
-  %95 = bitcast <512 x i1> %94 to <16 x i32>
+  %94 = call <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1> %91, <16 x i32> %92, <16 x i32> %93)
+  %95 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %94, i32 -1)
   store volatile <16 x i32> %95, <16 x i32>* @Q6VecPredResult, align 64
   %96 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %97 = bitcast <16 x i32> %96 to <512 x i1>
+  %97 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %96, i32 -1)
   %98 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %99 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %100 = call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %97, <16 x i32> %98, <16 x i32> %99)
-  %101 = bitcast <512 x i1> %100 to <16 x i32>
+  %100 = call <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1> %97, <16 x i32> %98, <16 x i32> %99)
+  %101 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %100, i32 -1)
   store volatile <16 x i32> %101, <16 x i32>* @Q6VecPredResult, align 64
   %102 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %103 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %104 = call <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
-  %105 = bitcast <512 x i1> %104 to <16 x i32>
+  %104 = call <64 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
+  %105 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %104, i32 -1)
   store volatile <16 x i32> %105, <16 x i32>* @Q6VecPredResult, align 64
   %106 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %107 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %108 = call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
-  %109 = bitcast <512 x i1> %108 to <16 x i32>
+  %108 = call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
+  %109 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %108, i32 -1)
   store volatile <16 x i32> %109, <16 x i32>* @Q6VecPredResult, align 64
   %110 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %111 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %112 = call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
-  %113 = bitcast <512 x i1> %112 to <16 x i32>
+  %112 = call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
+  %113 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %112, i32 -1)
   store volatile <16 x i32> %113, <16 x i32>* @Q6VecPredResult, align 64
   %114 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %115 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %116 = call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
-  %117 = bitcast <512 x i1> %116 to <16 x i32>
+  %116 = call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
+  %117 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %116, i32 -1)
   store volatile <16 x i32> %117, <16 x i32>* @Q6VecPredResult, align 64
   %118 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %119 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %120 = call <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
-  %121 = bitcast <512 x i1> %120 to <16 x i32>
+  %120 = call <64 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
+  %121 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %120, i32 -1)
   store volatile <16 x i32> %121, <16 x i32>* @Q6VecPredResult, align 64
   %122 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %123 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %124 = call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
-  %125 = bitcast <512 x i1> %124 to <16 x i32>
+  %124 = call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
+  %125 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %124, i32 -1)
   store volatile <16 x i32> %125, <16 x i32>* @Q6VecPredResult, align 64
   %126 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %127 = bitcast <16 x i32> %126 to <512 x i1>
+  %127 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %126, i32 -1)
   %128 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %129 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %130 = call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %127, <16 x i32> %128, <16 x i32> %129)
-  %131 = bitcast <512 x i1> %130 to <16 x i32>
+  %130 = call <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1> %127, <16 x i32> %128, <16 x i32> %129)
+  %131 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %130, i32 -1)
   store volatile <16 x i32> %131, <16 x i32>* @Q6VecPredResult, align 64
   %132 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %133 = bitcast <16 x i32> %132 to <512 x i1>
+  %133 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %132, i32 -1)
   %134 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %135 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %136 = call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %133, <16 x i32> %134, <16 x i32> %135)
-  %137 = bitcast <512 x i1> %136 to <16 x i32>
+  %136 = call <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1> %133, <16 x i32> %134, <16 x i32> %135)
+  %137 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %136, i32 -1)
   store volatile <16 x i32> %137, <16 x i32>* @Q6VecPredResult, align 64
   %138 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %139 = bitcast <16 x i32> %138 to <512 x i1>
+  %139 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %138, i32 -1)
   %140 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %141 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %142 = call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %139, <16 x i32> %140, <16 x i32> %141)
-  %143 = bitcast <512 x i1> %142 to <16 x i32>
+  %142 = call <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1> %139, <16 x i32> %140, <16 x i32> %141)
+  %143 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %142, i32 -1)
   store volatile <16 x i32> %143, <16 x i32>* @Q6VecPredResult, align 64
   %144 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %145 = bitcast <16 x i32> %144 to <512 x i1>
+  %145 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %144, i32 -1)
   %146 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %147 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %148 = call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %145, <16 x i32> %146, <16 x i32> %147)
-  %149 = bitcast <512 x i1> %148 to <16 x i32>
+  %148 = call <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1> %145, <16 x i32> %146, <16 x i32> %147)
+  %149 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %148, i32 -1)
   store volatile <16 x i32> %149, <16 x i32>* @Q6VecPredResult, align 64
   %150 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %151 = bitcast <16 x i32> %150 to <512 x i1>
+  %151 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %150, i32 -1)
   %152 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %153 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %154 = call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %151, <16 x i32> %152, <16 x i32> %153)
-  %155 = bitcast <512 x i1> %154 to <16 x i32>
+  %154 = call <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1> %151, <16 x i32> %152, <16 x i32> %153)
+  %155 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %154, i32 -1)
   store volatile <16 x i32> %155, <16 x i32>* @Q6VecPredResult, align 64
   %156 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %157 = bitcast <16 x i32> %156 to <512 x i1>
+  %157 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %156, i32 -1)
   %158 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %159 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %160 = call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %157, <16 x i32> %158, <16 x i32> %159)
-  %161 = bitcast <512 x i1> %160 to <16 x i32>
+  %160 = call <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1> %157, <16 x i32> %158, <16 x i32> %159)
+  %161 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %160, i32 -1)
   store volatile <16 x i32> %161, <16 x i32>* @Q6VecPredResult, align 64
   %162 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %163 = bitcast <16 x i32> %162 to <512 x i1>
+  %163 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %162, i32 -1)
   %164 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %165 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %166 = call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %163, <16 x i32> %164, <16 x i32> %165)
-  %167 = bitcast <512 x i1> %166 to <16 x i32>
+  %166 = call <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1> %163, <16 x i32> %164, <16 x i32> %165)
+  %167 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %166, i32 -1)
   store volatile <16 x i32> %167, <16 x i32>* @Q6VecPredResult, align 64
   %168 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %169 = bitcast <16 x i32> %168 to <512 x i1>
+  %169 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %168, i32 -1)
   %170 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %171 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %172 = call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %169, <16 x i32> %170, <16 x i32> %171)
-  %173 = bitcast <512 x i1> %172 to <16 x i32>
+  %172 = call <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1> %169, <16 x i32> %170, <16 x i32> %171)
+  %173 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %172, i32 -1)
   store volatile <16 x i32> %173, <16 x i32>* @Q6VecPredResult, align 64
   %174 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %175 = bitcast <16 x i32> %174 to <512 x i1>
+  %175 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %174, i32 -1)
   %176 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %177 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %178 = call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %175, <16 x i32> %176, <16 x i32> %177)
-  %179 = bitcast <512 x i1> %178 to <16 x i32>
+  %178 = call <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1> %175, <16 x i32> %176, <16 x i32> %177)
+  %179 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %178, i32 -1)
   store volatile <16 x i32> %179, <16 x i32>* @Q6VecPredResult, align 64
   %180 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %181 = bitcast <16 x i32> %180 to <512 x i1>
+  %181 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %180, i32 -1)
   %182 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %183 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %184 = call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %181, <16 x i32> %182, <16 x i32> %183)
-  %185 = bitcast <512 x i1> %184 to <16 x i32>
+  %184 = call <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1> %181, <16 x i32> %182, <16 x i32> %183)
+  %185 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %184, i32 -1)
   store volatile <16 x i32> %185, <16 x i32>* @Q6VecPredResult, align 64
   %186 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %187 = bitcast <16 x i32> %186 to <512 x i1>
+  %187 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %186, i32 -1)
   %188 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %189 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %190 = call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %187, <16 x i32> %188, <16 x i32> %189)
-  %191 = bitcast <512 x i1> %190 to <16 x i32>
+  %190 = call <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1> %187, <16 x i32> %188, <16 x i32> %189)
+  %191 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %190, i32 -1)
   store volatile <16 x i32> %191, <16 x i32>* @Q6VecPredResult, align 64
   %192 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %193 = bitcast <16 x i32> %192 to <512 x i1>
+  %193 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %192, i32 -1)
   %194 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %195 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %196 = call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %193, <16 x i32> %194, <16 x i32> %195)
-  %197 = bitcast <512 x i1> %196 to <16 x i32>
+  %196 = call <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1> %193, <16 x i32> %194, <16 x i32> %195)
+  %197 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %196, i32 -1)
   store volatile <16 x i32> %197, <16 x i32>* @Q6VecPredResult, align 64
   %198 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %199 = bitcast <16 x i32> %198 to <512 x i1>
+  %199 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %198, i32 -1)
   %200 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %201 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %202 = call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %199, <16 x i32> %200, <16 x i32> %201)
-  %203 = bitcast <512 x i1> %202 to <16 x i32>
+  %202 = call <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1> %199, <16 x i32> %200, <16 x i32> %201)
+  %203 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %202, i32 -1)
   store volatile <16 x i32> %203, <16 x i32>* @Q6VecPredResult, align 64
   %204 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %205 = bitcast <16 x i32> %204 to <512 x i1>
+  %205 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %204, i32 -1)
   %206 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %207 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %208 = call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %205, <16 x i32> %206, <16 x i32> %207)
-  %209 = bitcast <512 x i1> %208 to <16 x i32>
+  %208 = call <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1> %205, <16 x i32> %206, <16 x i32> %207)
+  %209 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %208, i32 -1)
   store volatile <16 x i32> %209, <16 x i32>* @Q6VecPredResult, align 64
   %210 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %211 = bitcast <16 x i32> %210 to <512 x i1>
+  %211 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %210, i32 -1)
   %212 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %213 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %214 = call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %211, <16 x i32> %212, <16 x i32> %213)
-  %215 = bitcast <512 x i1> %214 to <16 x i32>
+  %214 = call <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1> %211, <16 x i32> %212, <16 x i32> %213)
+  %215 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %214, i32 -1)
   store volatile <16 x i32> %215, <16 x i32>* @Q6VecPredResult, align 64
   %216 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %217 = bitcast <16 x i32> %216 to <512 x i1>
+  %217 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %216, i32 -1)
   %218 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %219 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %220 = call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %217, <16 x i32> %218, <16 x i32> %219)
-  %221 = bitcast <512 x i1> %220 to <16 x i32>
+  %220 = call <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1> %217, <16 x i32> %218, <16 x i32> %219)
+  %221 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %220, i32 -1)
   store volatile <16 x i32> %221, <16 x i32>* @Q6VecPredResult, align 64
   %222 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %223 = bitcast <16 x i32> %222 to <512 x i1>
+  %223 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %222, i32 -1)
   %224 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %225 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %226 = call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %223, <16 x i32> %224, <16 x i32> %225)
-  %227 = bitcast <512 x i1> %226 to <16 x i32>
+  %226 = call <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1> %223, <16 x i32> %224, <16 x i32> %225)
+  %227 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %226, i32 -1)
   store volatile <16 x i32> %227, <16 x i32>* @Q6VecPredResult, align 64
   %228 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %229 = bitcast <16 x i32> %228 to <512 x i1>
+  %229 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %228, i32 -1)
   %230 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %231 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %232 = call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %229, <16 x i32> %230, <16 x i32> %231)
-  %233 = bitcast <512 x i1> %232 to <16 x i32>
+  %232 = call <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1> %229, <16 x i32> %230, <16 x i32> %231)
+  %233 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %232, i32 -1)
   store volatile <16 x i32> %233, <16 x i32>* @Q6VecPredResult, align 64
-  %234 = call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
-  %235 = bitcast <512 x i1> %234 to <16 x i32>
+  %234 = call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
+  %235 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %234, i32 -1)
   store volatile <16 x i32> %235, <16 x i32>* @Q6VecPredResult, align 64
   %236 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %237 = bitcast <16 x i32> %236 to <512 x i1>
+  %237 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %236, i32 -1)
   %238 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %239 = bitcast <16 x i32> %238 to <512 x i1>
-  %240 = call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %237, <512 x i1> %239)
-  %241 = bitcast <512 x i1> %240 to <16 x i32>
+  %239 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %238, i32 -1)
+  %240 = call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %237, <64 x i1> %239)
+  %241 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %240, i32 -1)
   store volatile <16 x i32> %241, <16 x i32>* @Q6VecPredResult, align 64
   %242 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %243 = call <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32> %242)
@@ -676,8 +676,8 @@ entry:
   %253 = call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %251, <16 x i32> %252, i32 -1)
   store volatile <16 x i32> %253, <16 x i32>* @VectorResult, align 64
   %254 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %255 = bitcast <16 x i32> %254 to <512 x i1>
-  %256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %255, i32 -1)
+  %255 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %254, i32 -1)
+  %256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %255, i32 -1)
   store volatile <16 x i32> %256, <16 x i32>* @VectorResult, align 64
   %257 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %258 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@@ -685,8 +685,8 @@ entry:
   store volatile <16 x i32> %259, <16 x i32>* @VectorResult, align 64
   %260 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %261 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %262 = bitcast <16 x i32> %261 to <512 x i1>
-  %263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <512 x i1> %262, i32 -1)
+  %262 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %261, i32 -1)
+  %263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <64 x i1> %262, i32 -1)
   store volatile <16 x i32> %263, <16 x i32>* @VectorResult, align 64
   %264 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %265 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@@ -701,10 +701,10 @@ entry:
   %272 = call <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32> %270, <16 x i32> %271, i32 -1)
   store volatile <16 x i32> %272, <16 x i32>* @VectorResult, align 64
   %273 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %274 = bitcast <16 x i32> %273 to <512 x i1>
+  %274 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %273, i32 -1)
   %275 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %276 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %277 = call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %274, <16 x i32> %275, <16 x i32> %276)
+  %277 = call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %274, <16 x i32> %275, <16 x i32> %276)
   store volatile <16 x i32> %277, <16 x i32>* @VectorResult, align 64
   %278 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %279 = call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %278)
@@ -729,28 +729,28 @@ entry:
   %292 = call <16 x i32> @llvm.hexagon.V6.vd0()
   store volatile <16 x i32> %292, <16 x i32>* @VectorResult, align 64
   %293 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %294 = bitcast <16 x i32> %293 to <512 x i1>
+  %294 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %293, i32 -1)
   %295 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %296 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %294, <16 x i32> %295, <16 x i32> %296)
+  %297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %294, <16 x i32> %295, <16 x i32> %296)
   store volatile <16 x i32> %297, <16 x i32>* @VectorResult, align 64
   %298 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %299 = bitcast <16 x i32> %298 to <512 x i1>
+  %299 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %298, i32 -1)
   %300 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %301 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %299, <16 x i32> %300, <16 x i32> %301)
+  %302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %299, <16 x i32> %300, <16 x i32> %301)
   store volatile <16 x i32> %302, <16 x i32>* @VectorResult, align 64
   %303 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %304 = bitcast <16 x i32> %303 to <512 x i1>
+  %304 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %303, i32 -1)
   %305 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %306 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %304, <16 x i32> %305, <16 x i32> %306)
+  %307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %304, <16 x i32> %305, <16 x i32> %306)
   store volatile <16 x i32> %307, <16 x i32>* @VectorResult, align 64
   %308 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %309 = bitcast <16 x i32> %308 to <512 x i1>
+  %309 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %308, i32 -1)
   %310 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %311 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %309, <16 x i32> %310, <16 x i32> %311)
+  %312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %309, <16 x i32> %310, <16 x i32> %311)
   store volatile <16 x i32> %312, <16 x i32>* @VectorResult, align 64
   %313 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %314 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@@ -812,28 +812,28 @@ entry:
   %356 = call <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32> %354, <16 x i32> %355)
   store volatile <16 x i32> %356, <16 x i32>* @VectorResult, align 64
   %357 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %358 = bitcast <16 x i32> %357 to <512 x i1>
+  %358 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %357, i32 -1)
   %359 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %360 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %358, <16 x i32> %359, <16 x i32> %360)
+  %361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %358, <16 x i32> %359, <16 x i32> %360)
   store volatile <16 x i32> %361, <16 x i32>* @VectorResult, align 64
   %362 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %363 = bitcast <16 x i32> %362 to <512 x i1>
+  %363 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %362, i32 -1)
   %364 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %365 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %363, <16 x i32> %364, <16 x i32> %365)
+  %366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %363, <16 x i32> %364, <16 x i32> %365)
   store volatile <16 x i32> %366, <16 x i32>* @VectorResult, align 64
   %367 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %368 = bitcast <16 x i32> %367 to <512 x i1>
+  %368 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %367, i32 -1)
   %369 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %370 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %368, <16 x i32> %369, <16 x i32> %370)
+  %371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %368, <16 x i32> %369, <16 x i32> %370)
   store volatile <16 x i32> %371, <16 x i32>* @VectorResult, align 64
   %372 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %373 = bitcast <16 x i32> %372 to <512 x i1>
+  %373 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %372, i32 -1)
   %374 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %375 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %373, <16 x i32> %374, <16 x i32> %375)
+  %376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %373, <16 x i32> %374, <16 x i32> %375)
   store volatile <16 x i32> %376, <16 x i32>* @VectorResult, align 64
   %377 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %378 = call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %377)
@@ -1105,28 +1105,28 @@ entry:
   %574 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32> %571, <16 x i32> %572, <16 x i32> %573)
   store volatile <16 x i32> %574, <16 x i32>* @VectorResult, align 64
   %575 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %576 = bitcast <16 x i32> %575 to <512 x i1>
+  %576 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %575, i32 -1)
   %577 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %578 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %576, <16 x i32> %577, <16 x i32> %578)
+  %579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %576, <16 x i32> %577, <16 x i32> %578)
   store volatile <16 x i32> %579, <16 x i32>* @VectorResult, align 64
   %580 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %581 = bitcast <16 x i32> %580 to <512 x i1>
+  %581 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %580, i32 -1)
   %582 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %583 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %581, <16 x i32> %582, <16 x i32> %583)
+  %584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %581, <16 x i32> %582, <16 x i32> %583)
   store volatile <16 x i32> %584, <16 x i32>* @VectorResult, align 64
   %585 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %586 = bitcast <16 x i32> %585 to <512 x i1>
+  %586 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %585, i32 -1)
   %587 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %588 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %586, <16 x i32> %587, <16 x i32> %588)
+  %589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %586, <16 x i32> %587, <16 x i32> %588)
   store volatile <16 x i32> %589, <16 x i32>* @VectorResult, align 64
   %590 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %591 = bitcast <16 x i32> %590 to <512 x i1>
+  %591 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %590, i32 -1)
   %592 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %593 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %591, <16 x i32> %592, <16 x i32> %593)
+  %594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %591, <16 x i32> %592, <16 x i32> %593)
   store volatile <16 x i32> %594, <16 x i32>* @VectorResult, align 64
   %595 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %596 = call <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32> %595)
@@ -1359,10 +1359,10 @@ entry:
   %764 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %762, <16 x i32> %763, i32 1)
   store volatile <32 x i32> %764, <32 x i32>* @VectorPairResult, align 128
   %765 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %766 = bitcast <16 x i32> %765 to <512 x i1>
+  %766 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %765, i32 -1)
   %767 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
   %768 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
-  %769 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %766, <16 x i32> %767, <16 x i32> %768)
+  %769 = call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> %766, <16 x i32> %767, <16 x i32> %768)
   store volatile <32 x i32> %769, <32 x i32>* @VectorPairResult, align 128
   %770 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
   %771 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
@@ -1664,139 +1664,139 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #1
@@ -1814,13 +1814,13 @@ declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
 declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32>, <16 x i32>) #1
@@ -1832,7 +1832,7 @@ declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
 declare <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
@@ -1856,16 +1856,16 @@ declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vd0() #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32>, <16 x i32>) #1
@@ -1913,16 +1913,16 @@ declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1
@@ -2138,16 +2138,16 @@ declare <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32>, <16 x i32>, i32) #1
 declare <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #1
@@ -2318,7 +2318,7 @@ declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
 declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32>, <32 x i32>) #1

diff  --git a/llvm/test/CodeGen/Hexagon/v60_sort16.ll b/llvm/test/CodeGen/Hexagon/v60_sort16.ll
index 6c4626a2390e..f54768ed3f20 100644
--- a/llvm/test/CodeGen/Hexagon/v60_sort16.ll
+++ b/llvm/test/CodeGen/Hexagon/v60_sort16.ll
@@ -60,10 +60,10 @@ b1:                                               ; preds = %b3, %b0
 
 b2:                                               ; preds = %b1
   %v34 = load <16 x i32>, <16 x i32>* %v11, align 64
-  %v35 = bitcast <16 x i32> %v34 to <512 x i1>
+  %v35 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v34, i32 -1)
   %v36 = load <16 x i32>, <16 x i32>* %v14, align 64
   %v37 = load <16 x i32>, <16 x i32>* %v15, align 64
-  %v38 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %v35, <16 x i32> %v36, <16 x i32> %v37)
+  %v38 = call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> %v35, <16 x i32> %v36, <16 x i32> %v37)
   store <32 x i32> %v38, <32 x i32>* %v13, align 128
   %v39 = load <32 x i32>, <32 x i32>* %v13, align 128
   %v40 = call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v39)
@@ -89,7 +89,7 @@ b4:                                               ; preds = %b1
 }
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
@@ -100,5 +100,8 @@ declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
 
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/v60small.ll b/llvm/test/CodeGen/Hexagon/v60small.ll
index 746af018b06d..171ab28b0317 100644
--- a/llvm/test/CodeGen/Hexagon/v60small.ll
+++ b/llvm/test/CodeGen/Hexagon/v60small.ll
@@ -24,28 +24,34 @@ entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval, align 4
   %0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %1 = bitcast <16 x i32> %0 to <512 x i1>
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -1)
   %2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %3 = bitcast <16 x i32> %2 to <512 x i1>
-  %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
-  %5 = bitcast <512 x i1> %4 to <16 x i32>
+  %3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %2, i32 -1)
+  %4 = call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %1, <64 x i1> %3)
+  %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %4, i32 -1)
   store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
   %6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
-  %7 = bitcast <16 x i32> %6 to <512 x i1>
+  %7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %6, i32 -1)
   %8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
-  %9 = bitcast <16 x i32> %8 to <512 x i1>
-  %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
-  %11 = bitcast <512 x i1> %10 to <16 x i32>
+  %9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 -1)
+  %10 = call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %7, <64 x i1> %9)
+  %11 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %10, i32 -1)
   store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
   ret i32 0
 
 }
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll b/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
index 8831eab0a7d0..1ba41011f124 100644
--- a/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
+++ b/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
@@ -12,8 +12,9 @@ b0:
   store i32 %a0, i32* %v0, align 4
   store <16 x i32> %a1, <16 x i32>* %v1, align 64
   %v3 = load i32, i32* %v0, align 4
-  %v4 = load <16 x i32>, <16 x i32>* %v2, align 64
-  call void asm sideeffect "  $1 = vsetq2($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1
+  %v4 = tail call <64 x i1> asm sideeffect "  $0 = vsetq2($1);\0A", "=q,r"(i32 %v3) #1
+  %v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
+  store <16 x i32> %v5, <16 x i32>* %v2, align 64
   ret void
 }
 
@@ -23,5 +24,7 @@ b0:
   ret i32 0
 }
 
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
 attributes #0 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll b/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
index ec5cfe0e68fc..c5f989a88f53 100644
--- a/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
@@ -17,66 +17,66 @@ b1:                                               ; preds = %b1, %b0
   %v5 = phi i32 [ %v77, %b1 ], [ 0, %b0 ]
   %v6 = phi <32 x i32>* [ undef, %b1 ], [ undef, %b0 ]
   %v7 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> undef, <32 x i32> undef)
-  %v8 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v7, <32 x i32> zeroinitializer)
-  %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v8, <32 x i32> undef, <32 x i32> %v0)
+  %v8 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v7, <32 x i32> zeroinitializer)
+  %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v8, <32 x i32> undef, <32 x i32> %v0)
   %v10 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 3)
   %v11 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
   %v12 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v10, <32 x i32> undef)
-  %v13 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
-  %v14 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
-  %v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v13, <32 x i32> %v9, <32 x i32> %v0)
-  %v16 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v14, <32 x i32> %v15, <32 x i32> %v0)
-  %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v16, <32 x i32> %v0)
-  %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v17, <32 x i32> %v0)
-  %v19 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
-  %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v19, <32 x i32> %v18, <32 x i32> %v0)
-  %v21 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
-  %v22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> undef)
-  %v23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v21, <32 x i32> undef, <32 x i32> undef)
+  %v13 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
+  %v14 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
+  %v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v13, <32 x i32> %v9, <32 x i32> %v0)
+  %v16 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v14, <32 x i32> %v15, <32 x i32> %v0)
+  %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v16, <32 x i32> %v0)
+  %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v17, <32 x i32> %v0)
+  %v19 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+  %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v19, <32 x i32> %v18, <32 x i32> %v0)
+  %v21 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+  %v22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> undef)
+  %v23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v21, <32 x i32> undef, <32 x i32> undef)
   %v24 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v23, <32 x i32> %v22)
   %v25 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> zeroinitializer, <64 x i32> %v24, i32 16843009)
-  %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v20, <32 x i32> %v0)
-  %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v26, <32 x i32> %v0)
-  %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v27, <32 x i32> %v0)
-  %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v21, <32 x i32> %v28, <32 x i32> %v0)
-  %v30 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
-  %v31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> zeroinitializer)
+  %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v20, <32 x i32> %v0)
+  %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v26, <32 x i32> %v0)
+  %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v27, <32 x i32> %v0)
+  %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v21, <32 x i32> %v28, <32 x i32> %v0)
+  %v30 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+  %v31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> zeroinitializer)
   %v32 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v31, <32 x i32> undef)
   %v33 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v25, <64 x i32> %v32, i32 16843009)
   %v34 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v33, <64 x i32> undef, i32 16843009)
-  %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v29, <32 x i32> %v0)
-  %v36 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v35, <32 x i32> %v0)
-  %v37 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v36, <32 x i32> %v0)
-  %v38 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v30, <32 x i32> %v37, <32 x i32> %v0)
+  %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v29, <32 x i32> %v0)
+  %v36 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v35, <32 x i32> %v0)
+  %v37 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v36, <32 x i32> %v0)
+  %v38 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v30, <32 x i32> %v37, <32 x i32> %v0)
   %v39 = load <32 x i32>, <32 x i32>* null, align 128, !tbaa !0
-  %v40 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
-  %v41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v40, <32 x i32> undef, <32 x i32> %v39)
+  %v40 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+  %v41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v40, <32 x i32> undef, <32 x i32> %v39)
   %v42 = tail call <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32> %v34, <32 x i32> %v41, i32 16843009)
-  %v43 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v40, <32 x i32> %v38, <32 x i32> %v0)
+  %v43 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v40, <32 x i32> %v38, <32 x i32> %v0)
   %v44 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> %v39, <32 x i32> undef, i32 1)
   %v45 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> %v39, i32 1)
   %v46 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> %v39, i32 2)
   %v47 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v44, <32 x i32> undef)
   %v48 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v45, <32 x i32> undef)
   %v49 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v46, <32 x i32> undef)
-  %v50 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v47, <32 x i32> zeroinitializer)
-  %v51 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v48, <32 x i32> zeroinitializer)
-  %v52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v49, <32 x i32> zeroinitializer)
-  %v53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v52, <32 x i32> undef, <32 x i32> %v46)
+  %v50 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v47, <32 x i32> zeroinitializer)
+  %v51 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v48, <32 x i32> zeroinitializer)
+  %v52 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v49, <32 x i32> zeroinitializer)
+  %v53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v52, <32 x i32> undef, <32 x i32> %v46)
   %v54 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v42, <64 x i32> undef, i32 16843009)
   %v55 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v53, <32 x i32> undef)
   %v56 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v54, <64 x i32> %v55, i32 16843009)
-  %v57 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v50, <32 x i32> %v43, <32 x i32> %v0)
-  %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v51, <32 x i32> %v57, <32 x i32> %v0)
-  %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v58, <32 x i32> %v0)
-  %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v52, <32 x i32> %v59, <32 x i32> %v0)
-  %v61 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+  %v57 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v50, <32 x i32> %v43, <32 x i32> %v0)
+  %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v51, <32 x i32> %v57, <32 x i32> %v0)
+  %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v58, <32 x i32> %v0)
+  %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v52, <32 x i32> %v59, <32 x i32> %v0)
+  %v61 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
   %v62 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v56, <64 x i32> undef, i32 16843009)
   %v63 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v62, <64 x i32> zeroinitializer, i32 16843009)
-  %v64 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v60, <32 x i32> %v0)
-  %v65 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v61, <32 x i32> %v64, <32 x i32> %v0)
-  %v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v65, <32 x i32> %v0)
-  %v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v66, <32 x i32> %v0)
+  %v64 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v60, <32 x i32> %v0)
+  %v65 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v61, <32 x i32> %v64, <32 x i32> %v0)
+  %v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v65, <32 x i32> %v0)
+  %v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v66, <32 x i32> %v0)
   %v68 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> undef, <32 x i32> %v67, <32 x i32> %v1, i32 3)
   %v69 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> %v68, <32 x i32> %v67, <32 x i32> %v2, i32 4)
   %v70 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> %v69, <32 x i32> %v67, <32 x i32> %v2, i32 5)
@@ -108,16 +108,16 @@ declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #1
 declare <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32>, <32 x i32>, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll b/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
index a1c155621d5f..8be372a56c8f 100644
--- a/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
+++ b/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
@@ -19,8 +19,8 @@ b0:
   %v1 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 12)
   store <16 x i32> %v1, <16 x i32>* @g2, align 64, !tbaa !0
   %v2 = load <16 x i32>, <16 x i32>* @g0, align 64, !tbaa !0
-  %v3 = bitcast <16 x i32> %v2 to <512 x i1>
-  %v4 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v3, <16 x i32> %v0, <16 x i32> %v1)
+  %v3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v2, i32 -1)
+  %v4 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v3, <16 x i32> %v0, <16 x i32> %v1)
   store <16 x i32> %v4, <16 x i32>* @g3, align 64, !tbaa !0
   ret i32 0
 }
@@ -29,7 +29,10 @@ b0:
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll b/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
index 73633e6a877f..d724075a4ace 100644
--- a/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
+++ b/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
@@ -20,61 +20,61 @@ b2:                                               ; preds = %b2, %b1
   %v4 = phi <32 x i32>* [ %v3, %b1 ], [ undef, %b2 ]
   %v5 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> zeroinitializer, i32 2)
   %v6 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v5, <32 x i32> zeroinitializer)
-  %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
-  %v8 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v7, <32 x i32> zeroinitializer)
-  %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> zeroinitializer, <32 x i32> %v8, <32 x i32> zeroinitializer)
-  %v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v9, <32 x i32> zeroinitializer)
+  %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
+  %v8 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v7, <32 x i32> zeroinitializer)
+  %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> zeroinitializer, <32 x i32> %v8, <32 x i32> zeroinitializer)
+  %v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v9, <32 x i32> zeroinitializer)
   %v11 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> zeroinitializer, i32 4)
   %v12 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
   %v13 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
-  %v14 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
-  %v15 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> undef)
-  %v16 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v13, <32 x i32> undef)
-  %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v10, <32 x i32> zeroinitializer)
-  %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v14, <32 x i32> %v17, <32 x i32> zeroinitializer)
-  %v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v15, <32 x i32> %v18, <32 x i32> zeroinitializer)
-  %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v16, <32 x i32> %v19, <32 x i32> zeroinitializer)
+  %v14 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
+  %v15 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> undef)
+  %v16 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v13, <32 x i32> undef)
+  %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v10, <32 x i32> zeroinitializer)
+  %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v14, <32 x i32> %v17, <32 x i32> zeroinitializer)
+  %v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v15, <32 x i32> %v18, <32 x i32> zeroinitializer)
+  %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v16, <32 x i32> %v19, <32 x i32> zeroinitializer)
   %v21 = getelementptr inbounds i8, i8* null, i32 undef
   %v22 = bitcast i8* %v21 to <32 x i32>*
   %v23 = load <32 x i32>, <32 x i32>* %v22, align 128, !tbaa !0
   %v24 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v23, <32 x i32> zeroinitializer)
-  %v25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v24, <32 x i32> undef)
-  %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v25, <32 x i32> %v20, <32 x i32> zeroinitializer)
-  %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v26, <32 x i32> zeroinitializer)
-  %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v27, <32 x i32> zeroinitializer)
-  %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v28, <32 x i32> zeroinitializer)
-  %v30 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v29, <32 x i32> zeroinitializer)
-  %v31 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v30, <32 x i32> zeroinitializer)
-  %v32 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v31, <32 x i32> zeroinitializer)
-  %v33 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v32, <32 x i32> zeroinitializer)
-  %v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v33, <32 x i32> zeroinitializer)
-  %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v34, <32 x i32> zeroinitializer)
+  %v25 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v24, <32 x i32> undef)
+  %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v25, <32 x i32> %v20, <32 x i32> zeroinitializer)
+  %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v26, <32 x i32> zeroinitializer)
+  %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v27, <32 x i32> zeroinitializer)
+  %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v28, <32 x i32> zeroinitializer)
+  %v30 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v29, <32 x i32> zeroinitializer)
+  %v31 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v30, <32 x i32> zeroinitializer)
+  %v32 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v31, <32 x i32> zeroinitializer)
+  %v33 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v32, <32 x i32> zeroinitializer)
+  %v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v33, <32 x i32> zeroinitializer)
+  %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v34, <32 x i32> zeroinitializer)
   %v36 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 1)
   %v37 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 1)
   %v38 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 2)
   %v39 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v36, <32 x i32> zeroinitializer)
   %v40 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v37, <32 x i32> zeroinitializer)
   %v41 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v38, <32 x i32> zeroinitializer)
-  %v42 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v39, <32 x i32> undef)
-  %v43 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v40, <32 x i32> undef)
-  %v44 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v41, <32 x i32> undef)
-  %v45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> undef)
-  %v46 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v42, <32 x i32> %v35, <32 x i32> zeroinitializer)
-  %v47 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v43, <32 x i32> %v46, <32 x i32> zeroinitializer)
-  %v48 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v44, <32 x i32> %v47, <32 x i32> zeroinitializer)
-  %v49 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v45, <32 x i32> %v48, <32 x i32> zeroinitializer)
+  %v42 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v39, <32 x i32> undef)
+  %v43 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v40, <32 x i32> undef)
+  %v44 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v41, <32 x i32> undef)
+  %v45 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> undef)
+  %v46 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v42, <32 x i32> %v35, <32 x i32> zeroinitializer)
+  %v47 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v43, <32 x i32> %v46, <32 x i32> zeroinitializer)
+  %v48 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v44, <32 x i32> %v47, <32 x i32> zeroinitializer)
+  %v49 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v45, <32 x i32> %v48, <32 x i32> zeroinitializer)
   %v50 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 4)
   %v51 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 4)
   %v52 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
   %v53 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v50, <32 x i32> zeroinitializer)
   %v54 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v51, <32 x i32> zeroinitializer)
-  %v55 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v52, <32 x i32> undef)
-  %v56 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v53, <32 x i32> undef)
-  %v57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v54, <32 x i32> undef)
-  %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v49, <32 x i32> zeroinitializer)
-  %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v55, <32 x i32> %v58, <32 x i32> zeroinitializer)
-  %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v56, <32 x i32> %v59, <32 x i32> zeroinitializer)
-  %v61 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v57, <32 x i32> %v60, <32 x i32> zeroinitializer)
+  %v55 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v52, <32 x i32> undef)
+  %v56 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v53, <32 x i32> undef)
+  %v57 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v54, <32 x i32> undef)
+  %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v49, <32 x i32> zeroinitializer)
+  %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v55, <32 x i32> %v58, <32 x i32> zeroinitializer)
+  %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v56, <32 x i32> %v59, <32 x i32> zeroinitializer)
+  %v61 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v57, <32 x i32> %v60, <32 x i32> zeroinitializer)
   %v62 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> zeroinitializer, <32 x i32> %v61, <32 x i32> undef, i32 5)
   %v63 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> undef, <32 x i32> undef)
   %v64 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v62)
@@ -100,10 +100,10 @@ declare <32 x i32> @llvm.hexagon.V6.vshuffh.128B(<32 x i32>) #1
 declare <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
 
 ; Function Attrs: nounwind readnone
 declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #1

diff  --git a/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
index 08f7e33579e4..f8a12d33689b 100644
--- a/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
+++ b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
@@ -30,20 +30,20 @@ entry:
   %call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3
   tail call void @init_vectors() #3
   %0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
+  %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
   %2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648)
-  %4 = bitcast <512 x i1> %3 to <16 x i32>
+  %3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 -2147483648)
+  %4 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %3, i32 -1)
   store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0))
   tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
-  %5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1)
-  %6 = bitcast <512 x i1> %5 to <16 x i32>
+  %5 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 -1)
+  %6 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %5, i32 -1)
   store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
   %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0))
   tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
-  %7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0)
-  %8 = bitcast <512 x i1> %7 to <16 x i32>
+  %7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 0)
+  %8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %7, i32 -1)
   store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
   %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0))
   tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
@@ -57,10 +57,13 @@ declare i32 @acquire_vector_unit(i8 zeroext) #1
 declare void @init_vectors() #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2
+declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #2
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #2
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2

diff  --git a/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll b/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
index c609c52f98c4..ab4f7eee1a3f 100644
--- a/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
+++ b/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
@@ -11,19 +11,18 @@ target triple = "hexagon"
 define i32 @f0() #0 {
 b0:
   %v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %v1 = bitcast <16 x i32> %v0 to <512 x i1>
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
   %v2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
-  %v3 = bitcast <16 x i32> %v2 to <512 x i1>
-  %v4 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> %v3)
-  %v5 = bitcast <512 x i1> %v4 to <16 x i32>
+  %v3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v2, i32 -1)
+  %v4 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> %v3)
+  %v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
   store <16 x i32> %v5, <16 x i32>* @g0, align 64, !tbaa !0
   ret i32 0
 }
 
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
-
-; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32)
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }

diff  --git a/llvm/test/CodeGen/Hexagon/vect-downscale.ll b/llvm/test/CodeGen/Hexagon/vect-downscale.ll
index 9ab6b1bee2c4..ce10b74a72a6 100644
--- a/llvm/test/CodeGen/Hexagon/vect-downscale.ll
+++ b/llvm/test/CodeGen/Hexagon/vect-downscale.ll
@@ -34,7 +34,7 @@ b0:
   %v11 = mul i32 %v10, %v9
   %v12 = sub i32 %a1, %v11
   %v13 = lshr i32 %v12, 1
-  %v14 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
+  %v14 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
   %v15 = icmp eq i32 %a2, 0
   br i1 %v15, label %b11, label %b1
 
@@ -132,7 +132,7 @@ b9:                                               ; preds = %b8, %b7
   %v80 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32> %v78, <16 x i32> %v76, i32 1077952576)
   %v81 = tail call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %v80, <16 x i32> %v79)
   %v82 = load <16 x i32>, <16 x i32>* %v68, align 64, !tbaa !2
-  %v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v14, <16 x i32> %v81, <16 x i32> %v82)
+  %v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v14, <16 x i32> %v81, <16 x i32> %v82)
   store <16 x i32> %v83, <16 x i32>* %v68, align 64, !tbaa !2
   br label %b10
 
@@ -157,7 +157,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 declare i32 @llvm.hexagon.S2.ct0(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
 
 ; Function Attrs: nounwind readnone
 declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
@@ -166,7 +166,7 @@ declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
 declare <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32>, <16 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
 
 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/Hexagon/vector-align.ll b/llvm/test/CodeGen/Hexagon/vector-align.ll
index 043839c704ae..d2e0071700ed 100644
--- a/llvm/test/CodeGen/Hexagon/vector-align.ll
+++ b/llvm/test/CodeGen/Hexagon/vector-align.ll
@@ -1,5 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s \
-; RUN:    | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 
 ; Check that the store to Q6VecPredResult does not get expanded into multiple
 ; stores. There should be no memd's. This relies on the alignment specified
@@ -11,25 +10,23 @@
 
 @Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
 
-; Function Attrs: nounwind
 define i32 @foo() #0 {
 entry:
-  %0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
-  %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -2147483648)
-  store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @Q6VecPredResult to <512 x i1>*), align 64, !tbaa !1
+  %v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+  %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -2147483648)
+  %v2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v1, i32 -1)
+  store <16 x i32> %v2, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
   tail call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
   ret i32 0
 }
 
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
-
-; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
 
 declare void @print_vecpred(i32, i8*) #2
 
-attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" }
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length64b" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 

diff  --git a/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll b/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
index e6be3ee69c04..58fe4ad6675a 100644
--- a/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
+++ b/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
@@ -12,7 +12,7 @@ for.body9.us:
   %cmp10.us = icmp eq i32 0, undef
   %.h63h32.2.us = select i1 %cmp10.us, <16 x i32> zeroinitializer, <16 x i32> undef
   %0 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %.h63h32.2.us, <16 x i32> undef, i32 2)
-  %1 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> undef, <16 x i32> undef, <16 x i32> %0)
+  %1 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> undef, <16 x i32> undef, <16 x i32> %0)
   %2 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %1)
   %3 = tail call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> undef, <16 x i32> %2, i32 62)
   %4 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %3)
@@ -24,7 +24,7 @@ for.body43.us.preheader:                          ; preds = %for.body9.us
 }
 
 declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
-declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
 declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
 declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1


        


More information about the cfe-commits mailing list