[clang] b1d4746 - [Hexagon] Change HVX vector predicate types from v512/1024i1 to v64/128i1
Krzysztof Parzyszek via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 19 12:15:29 PST 2020
Author: Krzysztof Parzyszek
Date: 2020-02-19T14:14:56-06:00
New Revision: b1d47467e26142e6029e9ec7ca5c42645ffaa7bb
URL: https://github.com/llvm/llvm-project/commit/b1d47467e26142e6029e9ec7ca5c42645ffaa7bb
DIFF: https://github.com/llvm/llvm-project/commit/b1d47467e26142e6029e9ec7ca5c42645ffaa7bb.diff
LOG: [Hexagon] Change HVX vector predicate types from v512/1024i1 to v64/128i1
This commit removes the artificial types <512 x i1> and <1024 x i1>
from HVX intrinsics, and makes v512i1 and v1024i1 no longer legal on
Hexagon.
It may cause existing bitcode files to become invalid.
* Converting between vector predicates and vector registers must be
done explicitly via vandvrt/vandqrt instructions (their intrinsics),
i.e. (for 64-byte mode):
%Q = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %V, i32 -1)
%V = call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %Q, i32 -1)
The conversion intrinsics are:
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32)
declare <32 x i32> @llvm.hexagon.V6.vandqrt.128B(<128 x i1>, i32)
They are all pure.
* Vector predicate values cannot be loaded/stored directly. This directly
reflects the architecture restriction. Loading and storing or vector
predicates must be done indirectly via vector registers and explicit
conversions via vandvrt/vandqrt instructions.
Added:
clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def
Modified:
clang/include/clang/Basic/BuiltinsHexagon.def
clang/include/clang/Basic/BuiltinsHexagonDep.def
clang/include/clang/module.modulemap
clang/lib/Basic/Targets/Hexagon.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-hexagon-v66-128B.c
clang/test/CodeGen/builtins-hexagon-v66.c
clang/test/CodeGen/builtins-hvx128.c
clang/test/CodeGen/builtins-hvx64.c
llvm/include/llvm/IR/IntrinsicsHexagon.td
llvm/include/llvm/IR/IntrinsicsHexagonDep.td
llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/lib/Target/Hexagon/HexagonIntrinsics.td
llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
llvm/lib/Target/Hexagon/HexagonSubtarget.h
llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
llvm/test/CodeGen/Hexagon/late_instr.ll
llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
llvm/test/CodeGen/Hexagon/split-vecpred.ll
llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
llvm/test/CodeGen/Hexagon/swp-sigma.ll
llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
llvm/test/CodeGen/Hexagon/v6-spill1.ll
llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
llvm/test/CodeGen/Hexagon/v60-vsel1.ll
llvm/test/CodeGen/Hexagon/v60-vsel2.ll
llvm/test/CodeGen/Hexagon/v60Intrins.ll
llvm/test/CodeGen/Hexagon/v60_sort16.ll
llvm/test/CodeGen/Hexagon/v60small.ll
llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
llvm/test/CodeGen/Hexagon/vect-downscale.ll
llvm/test/CodeGen/Hexagon/vector-align.ll
llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsHexagon.def b/clang/include/clang/Basic/BuiltinsHexagon.def
index 40fdbeffdf51..28aa222166f5 100644
--- a/clang/include/clang/Basic/BuiltinsHexagon.def
+++ b/clang/include/clang/Basic/BuiltinsHexagon.def
@@ -96,14 +96,14 @@ TARGET_BUILTIN(__builtin_HEXAGON_S2_storerd_pcr, "vv*iLLivC*", "", V5)
TARGET_BUILTIN(__builtin_HEXAGON_prefetch,"vv*","", V5)
TARGET_BUILTIN(__builtin_HEXAGON_A6_vminub_RdP,"LLiLLiLLi","", V62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV16iv*V16i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV32iv*V32i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV32iv*V32i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV32iv*V32i","", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV32iv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV64bv*V16i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV128bv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV128bv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV128bv*V32i","", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV128bv*V32i","", HVXV60)
// These are only valid on v65
diff --git a/clang/include/clang/Basic/BuiltinsHexagonDep.def b/clang/include/clang/Basic/BuiltinsHexagonDep.def
index 93fa373244d7..b694e4c35d3b 100644
--- a/clang/include/clang/Basic/BuiltinsHexagonDep.def
+++ b/clang/include/clang/Basic/BuiltinsHexagonDep.def
@@ -924,14 +924,14 @@ TARGET_BUILTIN(__builtin_HEXAGON_F2_dfmpyhh, "dddd", "", V67)
// V60 HVX Instructions.
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B, "vV32iv*V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B, "vV32iv*V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B, "vV32iv*V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai, "vV16iv*V16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B, "vV32iv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B, "vV128bv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B, "vV128bv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B, "vV128bv*V32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai, "vV64bv*V16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B, "vV128bv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_valignb, "V16iV16iV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_valignb_128B, "V32iV32iV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vlalignb, "V16iV16iV16ii", "", HVXV60)
@@ -1212,30 +1212,30 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubuhw, "V32iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubuhw_128B, "V64iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vd0, "V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vd0_128B, "V32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq_128B, "V32iV32iV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh, "V16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh_128B, "V32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh_sat, "V16iV16i", "", HVXV60)
@@ -1346,104 +1346,104 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vxor, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vxor_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vnot, "V16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vnot_128B, "V32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt, "V16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_128B, "V32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc, "V16iV16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc_128B, "V32iV32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt, "V16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_128B, "V32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc, "V16iV16iV16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc_128B, "V32iV32iV32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not, "V16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not_128B, "V32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n, "V16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n_128B, "V32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2, "V16ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2_128B, "V32ii", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux, "V16iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux_128B, "V32iV32iV32iV32i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap, "V32iV16iV16iV16i", "", HVXV60)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap_128B, "V64iV32iV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt, "V16iV64bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_128B, "V32iV128bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc, "V16iV16iV64bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc_128B, "V32iV32iV128bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt, "V64bV16ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_128B, "V128bV32ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc, "V64bV64bV16ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc_128B, "V128bV128bV32ii", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub, "V64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_128B, "V128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor, "V64bV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not, "V64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not_128B, "V128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n, "V64bV64bV64b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n_128B, "V128bV128bV128b", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2, "V64bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2_128B, "V128bi", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux, "V16iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux_128B, "V32iV128bV32iV32i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap, "V32iV64bV16iV16i", "", HVXV60)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap_128B, "V64iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxub, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxub_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vminub, "V16iV16iV16i", "", HVXV60)
@@ -1585,20 +1585,20 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub, "V16iV16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_128B, "V32iV32ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc, "V16iV16iV16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc_128B, "V32iV32iV32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt, "V16iV16ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B, "V32iV32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc, "V16iV16iV16ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B, "V32iV32iV32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B, "V32iV32iV32i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B, "V32iV32iV32i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2, "V16ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B, "V32ii", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B, "V32iV32iV32i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh, "V16iV16iV16i", "", HVXV62)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B, "V32iV32iV32i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt, "V16iV64bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B, "V32iV128bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc, "V16iV16iV64bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B, "V32iV32iV128bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv, "V16iV64bV16i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B, "V32iV128bV32i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv, "V16iV64bV16i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B, "V32iV128bV32i", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2, "V64bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B, "V128bi", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw, "V64bV64bV64b", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B, "V128bV128bV128b", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh, "V64bV64bV64b", "", HVXV62)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B, "V128bV128bV128b", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxb, "V16iV16iV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxb_128B, "V32iV32iV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vminb, "V16iV16iV16i", "", HVXV62)
@@ -1678,12 +1678,12 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermh, "vv*iiV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermh_128B, "vv*iiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhw, "vv*iiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhw_128B, "vv*iiV64i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq, "vv*V16iiiV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B, "vv*V32iiiV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq, "vv*V16iiiV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B, "vv*V32iiiV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq, "vv*V16iiiV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B, "vv*V32iiiV64i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq, "vv*V64biiV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B, "vv*V128biiV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq, "vv*V64biiV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B, "vv*V128biiV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq, "vv*V64biiV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B, "vv*V128biiV64i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw, "viiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_128B, "viiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh, "viiV16iV16i", "", HVXV65)
@@ -1692,22 +1692,22 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_add, "viiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_add_128B, "viiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh_add, "viiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh_add_128B, "viiV32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq, "vV16iiiV16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B, "vV32iiiV32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq, "vV16iiiV16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B, "vV32iiiV32iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq, "vV64biiV16iV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B, "vV128biiV32iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq, "vV64biiV16iV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B, "vV128biiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw, "viiV32iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_128B, "viiV64iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq, "vV16iiiV32iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B, "vV32iiiV64iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq, "vV64biiV32iV16i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B, "vV128biiV64iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add, "viiV32iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add_128B, "viiV64iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb, "V16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B, "V32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh, "V16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B, "V32iV32i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw, "V16iV16i", "", HVXV65)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B, "V32iV32i", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb, "V16iV64b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B, "V32iV128b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh, "V16iV64b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B, "V32iV128b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw, "V16iV64b", "", HVXV65)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B, "V32iV128b", "", HVXV65)
// V66 HVX Instructions.
@@ -1715,7 +1715,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrotr, "V16iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vrotr_128B, "V32iV32iV32i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vasr_into, "V32iV32iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vasr_into_128B, "V64iV64iV32iV32i", "", HVXV66)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat, "V16iV16iV16iV16i", "", HVXV66)
-TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat_128B, "V32iV32iV32iV32i", "", HVXV66)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat, "V16iV16iV16iV64b", "", HVXV66)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat_128B, "V32iV32iV32iV128b", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsatdw, "V16iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsatdw_128B, "V32iV32iV32i", "", HVXV66)
diff --git a/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def
new file mode 100644
index 000000000000..9478a1b3fd14
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def
@@ -0,0 +1,206 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//===----------------------------------------------------------------------===//
+
+CUSTOM_BUILTIN_MAPPING(M2_mpysmi, 0)
+CUSTOM_BUILTIN_MAPPING(M2_dpmpyss_s0, 0)
+CUSTOM_BUILTIN_MAPPING(M2_dpmpyuu_s0, 0)
+CUSTOM_BUILTIN_MAPPING(M2_mpyi, 0)
+CUSTOM_BUILTIN_MAPPING(M2_mpyui, 0)
+CUSTOM_BUILTIN_MAPPING(A2_add, 0)
+CUSTOM_BUILTIN_MAPPING(A2_sub, 0)
+CUSTOM_BUILTIN_MAPPING(A2_addi, 0)
+CUSTOM_BUILTIN_MAPPING(A2_addp, 0)
+CUSTOM_BUILTIN_MAPPING(A2_subp, 0)
+CUSTOM_BUILTIN_MAPPING(A2_neg, 0)
+CUSTOM_BUILTIN_MAPPING(A2_zxtb, 0)
+CUSTOM_BUILTIN_MAPPING(A2_sxtb, 0)
+CUSTOM_BUILTIN_MAPPING(A2_zxth, 0)
+CUSTOM_BUILTIN_MAPPING(A2_sxth, 0)
+CUSTOM_BUILTIN_MAPPING(A2_and, 0)
+CUSTOM_BUILTIN_MAPPING(A2_or, 0)
+CUSTOM_BUILTIN_MAPPING(A2_xor, 0)
+CUSTOM_BUILTIN_MAPPING(A2_not, 0)
+CUSTOM_BUILTIN_MAPPING(A2_subri, 0)
+CUSTOM_BUILTIN_MAPPING(A2_andir, 0)
+CUSTOM_BUILTIN_MAPPING(A2_orir, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asr_i_r, 0)
+CUSTOM_BUILTIN_MAPPING(S2_lsr_i_r, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asl_i_r, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asr_i_p, 0)
+CUSTOM_BUILTIN_MAPPING(S2_lsr_i_p, 0)
+CUSTOM_BUILTIN_MAPPING(S2_asl_i_p, 0)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddbnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubbnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddhnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubhnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddwnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwnq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubwnq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqw_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgth_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqh_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_veqb_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_not, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_not_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_xor, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_xor_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and_n, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_and_n_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or_n, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_or_n_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vmux, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vmux_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vswap, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vswap_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarry, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarry_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vsubcarry, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vsubcarry_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvqv, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvqv_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vandvnqv, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vandvnqv_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2, 64)
+CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_shuffeqh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqb, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqb_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqh, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqh_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqw, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vprefixqw_128B, 128)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat, 64)
+CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat_128B, 128)
diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap
index f36fc6bd55a4..af1322acc289 100644
--- a/clang/include/clang/module.modulemap
+++ b/clang/include/clang/module.modulemap
@@ -39,6 +39,7 @@ module Clang_Basic {
textual header "Basic/Builtins.def"
textual header "Basic/BuiltinsHexagon.def"
textual header "Basic/BuiltinsHexagonDep.def"
+ textual header "Basic/BuiltinsHexagonMapCustomDep.def"
textual header "Basic/BuiltinsLe64.def"
textual header "Basic/BuiltinsMips.def"
textual header "Basic/BuiltinsNEON.def"
diff --git a/clang/lib/Basic/Targets/Hexagon.h b/clang/lib/Basic/Targets/Hexagon.h
index 2a72825e3c5a..f58f594b104f 100644
--- a/clang/lib/Basic/Targets/Hexagon.h
+++ b/clang/lib/Basic/Targets/Hexagon.h
@@ -57,6 +57,12 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo {
LargeArrayAlign = 64;
UseBitFieldTypeAlignment = true;
ZeroLengthBitfieldBoundary = 32;
+
+ // These are the default values anyway, but explicitly make sure
+ // that the size of the boolean type is 8 bits. Bool vectors are used
+ // for modeling predicate registers in HVX, and the bool -> byte
+ // correspondence matches the HVX architecture.
+ BoolWidth = BoolAlign = 8;
}
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 401c4d8e0539..bda01c6598a0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15007,10 +15007,46 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
}
}
+static std::pair<Intrinsic::ID, unsigned>
+getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) {
+ struct Info {
+ unsigned BuiltinID;
+ Intrinsic::ID IntrinsicID;
+ unsigned VecLen;
+ };
+ Info Infos[] = {
+#define CUSTOM_BUILTIN_MAPPING(x,s) \
+ { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
+ CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
+#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
+#undef CUSTOM_BUILTIN_MAPPING
+ };
+
+ auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
+ static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
+ (void)SortOnce;
+
+ const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
+ Info{BuiltinID, 0, 0}, CmpInfo);
+ if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
+ return {Intrinsic::not_intrinsic, 0};
+
+ return {F->IntrinsicID, F->VecLen};
+}
+
Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<llvm::Value *, 4> Ops;
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ Intrinsic::ID ID;
+ unsigned VecLen;
+ std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID);
auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
// The base pointer is passed by address, so it needs to be loaded.
@@ -15099,51 +15135,41 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Result, 1);
};
+ auto V2Q = [this, VecLen] (llvm::Value *Vec) {
+ Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
+ : Intrinsic::hexagon_V6_vandvrt;
+ return Builder.CreateCall(CGM.getIntrinsic(ID),
+ {Vec, Builder.getInt32(-1)});
+ };
+ auto Q2V = [this, VecLen] (llvm::Value *Pred) {
+ Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
+ : Intrinsic::hexagon_V6_vandqrt;
+ return Builder.CreateCall(CGM.getIntrinsic(ID),
+ {Pred, Builder.getInt32(-1)});
+ };
+
switch (BuiltinID) {
+ // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
+ // and the corresponding C/C++ builtins use loads/stores to update
+ // the predicate.
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
- case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
- Address Dest = EmitPointerWithAlignment(E->getArg(2));
- unsigned Size;
- if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
- Size = 512;
- ID = Intrinsic::hexagon_V6_vaddcarry;
- } else {
- Size = 1024;
- ID = Intrinsic::hexagon_V6_vaddcarry_128B;
- }
- Dest = Builder.CreateBitCast(Dest,
- llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
- LoadInst *QLd = Builder.CreateLoad(Dest);
- Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
- llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
- Vprd->getType()->getPointerTo(0));
- Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
- return Builder.CreateExtractValue(Result, 0);
- }
+ case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
- Address Dest = EmitPointerWithAlignment(E->getArg(2));
- unsigned Size;
- if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
- Size = 512;
- ID = Intrinsic::hexagon_V6_vsubcarry;
- } else {
- Size = 1024;
- ID = Intrinsic::hexagon_V6_vsubcarry_128B;
- }
- Dest = Builder.CreateBitCast(Dest,
- llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
- LoadInst *QLd = Builder.CreateLoad(Dest);
- Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
- llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
- llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
- Vprd->getType()->getPointerTo(0));
- Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+ // Get the type from the 0-th argument.
+ llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
+ Address PredAddr = Builder.CreateBitCast(
+ EmitPointerWithAlignment(E->getArg(2)), VecType->getPointerTo(0));
+ llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
+ {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
+
+ llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
+ Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
+ PredAddr.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
+
case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
@@ -15200,8 +15226,38 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
case Hexagon::BI__builtin_brev_ldd:
return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
- default:
- break;
+ default: {
+ if (ID == Intrinsic::not_intrinsic)
+ return nullptr;
+
+ auto IsVectorPredTy = [] (llvm::Type *T) {
+ return T->isVectorTy() && T->getVectorElementType()->isIntegerTy(1);
+ };
+
+ llvm::Function *IntrFn = CGM.getIntrinsic(ID);
+ llvm::FunctionType *IntrTy = IntrFn->getFunctionType();
+ SmallVector<llvm::Value*,4> Ops;
+ for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) {
+ llvm::Type *T = IntrTy->getParamType(i);
+ const Expr *A = E->getArg(i);
+ if (IsVectorPredTy(T)) {
+ // There will be an implicit cast to a boolean vector. Strip it.
+ if (auto *Cast = dyn_cast<ImplicitCastExpr>(A)) {
+ if (Cast->getCastKind() == CK_BitCast)
+ A = Cast->getSubExpr();
+ }
+ Ops.push_back(V2Q(EmitScalarExpr(A)));
+ } else {
+ Ops.push_back(EmitScalarExpr(A));
+ }
+ }
+
+ llvm::Value *Call = Builder.CreateCall(IntrFn, Ops);
+ if (IsVectorPredTy(IntrTy->getReturnType()))
+ Call = Q2V(Call);
+
+ return Call;
+ } // default
} // switch
return nullptr;
diff --git a/clang/test/CodeGen/builtins-hexagon-v66-128B.c b/clang/test/CodeGen/builtins-hexagon-v66-128B.c
index a1c4786cf24c..074728ec07ec 100644
--- a/clang/test/CodeGen/builtins-hexagon-v66-128B.c
+++ b/clang/test/CodeGen/builtins-hexagon-v66-128B.c
@@ -9,7 +9,7 @@ typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256)))
__attribute__((aligned(256)));
// CHECK-LABEL: @test1
-// CHECK: call <32 x i32> @llvm.hexagon.V6.vaddcarrysat.128B(<32 x i32> %{{[0-9]+}}, <32 x i32> %{{[0-9]+}}, <1024 x i1> %{{[0-9]+}})
+// CHECK: call <32 x i32> @llvm.hexagon.V6.vaddcarrysat.128B(<32 x i32> %{{[0-9]+}}, <32 x i32> %{{[0-9]+}}, <128 x i1> %{{[0-9]+}})
HEXAGON_Vect1024 test1(void *in, void *out) {
HEXAGON_Vect1024 v1, v2;
HEXAGON_Vect1024 *p;
diff --git a/clang/test/CodeGen/builtins-hexagon-v66.c b/clang/test/CodeGen/builtins-hexagon-v66.c
index 1382f18b4faf..767f9faf7702 100644
--- a/clang/test/CodeGen/builtins-hexagon-v66.c
+++ b/clang/test/CodeGen/builtins-hexagon-v66.c
@@ -33,7 +33,7 @@ typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128)))
__attribute__((aligned(128)));
// CHECK-LABEL: @test5
-// CHECK: call <16 x i32> @llvm.hexagon.V6.vaddcarrysat(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <512 x i1> %{{[0-9]+}})
+// CHECK: call <16 x i32> @llvm.hexagon.V6.vaddcarrysat(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <64 x i1> %{{[0-9]+}})
HEXAGON_Vect512 test5(void *in, void *out) {
HEXAGON_Vect512 v1, v2;
HEXAGON_Vect512 *p;
diff --git a/clang/test/CodeGen/builtins-hvx128.c b/clang/test/CodeGen/builtins-hvx128.c
index 07d0e050ddc0..d61afdefc2ae 100644
--- a/clang/test/CodeGen/builtins-hvx128.c
+++ b/clang/test/CodeGen/builtins-hvx128.c
@@ -2,6 +2,7 @@
// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length128b -emit-llvm %s -o - | FileCheck %s
void test() {
+ int q128 __attribute__((__vector_size__(128)));
int v128 __attribute__((__vector_size__(128)));
int v256 __attribute__((__vector_size__(256)));
@@ -18,33 +19,33 @@ void test() {
// CHECK: @llvm.hexagon.V6.lvsplatw.128B
__builtin_HEXAGON_V6_lvsplatw_128B(0);
// CHECK: @llvm.hexagon.V6.pred.and.128B
- __builtin_HEXAGON_V6_pred_and_128B(v128, v128);
+ __builtin_HEXAGON_V6_pred_and_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.and.n.128B
- __builtin_HEXAGON_V6_pred_and_n_128B(v128, v128);
+ __builtin_HEXAGON_V6_pred_and_n_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.not.128B
- __builtin_HEXAGON_V6_pred_not_128B(v128);
+ __builtin_HEXAGON_V6_pred_not_128B(q128);
// CHECK: @llvm.hexagon.V6.pred.or.128B
- __builtin_HEXAGON_V6_pred_or_128B(v128, v128);
+ __builtin_HEXAGON_V6_pred_or_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.or.n.128B
- __builtin_HEXAGON_V6_pred_or_n_128B(v128, v128);
+ __builtin_HEXAGON_V6_pred_or_n_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.scalar2.128B
__builtin_HEXAGON_V6_pred_scalar2_128B(0);
// CHECK: @llvm.hexagon.V6.pred.scalar2v2.128B
__builtin_HEXAGON_V6_pred_scalar2v2_128B(0);
// CHECK: @llvm.hexagon.V6.pred.xor.128B
- __builtin_HEXAGON_V6_pred_xor_128B(v128, v128);
+ __builtin_HEXAGON_V6_pred_xor_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.shuffeqh.128B
- __builtin_HEXAGON_V6_shuffeqh_128B(v128, v128);
+ __builtin_HEXAGON_V6_shuffeqh_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.shuffeqw.128B
- __builtin_HEXAGON_V6_shuffeqw_128B(v128, v128);
+ __builtin_HEXAGON_V6_shuffeqw_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai.128B
- __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B
- __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B
- __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vS32b.qpred.ai.128B
- __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vabsb.128B
__builtin_HEXAGON_V6_vabsb_128B(v128);
// CHECK: @llvm.hexagon.V6.vabsb.sat.128B
@@ -70,9 +71,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddb.dv.128B
__builtin_HEXAGON_V6_vaddb_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vaddbnq.128B
- __builtin_HEXAGON_V6_vaddbnq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vaddbnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbq.128B
- __builtin_HEXAGON_V6_vaddbq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vaddbq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbsat.128B
__builtin_HEXAGON_V6_vaddbsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbsat.dv.128B
@@ -88,9 +89,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddh.dv.128B
__builtin_HEXAGON_V6_vaddh_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vaddhnq.128B
- __builtin_HEXAGON_V6_vaddhnq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vaddhnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhq.128B
- __builtin_HEXAGON_V6_vaddhq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vaddhq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhsat.128B
__builtin_HEXAGON_V6_vaddhsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B
@@ -126,9 +127,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddw.dv.128B
__builtin_HEXAGON_V6_vaddw_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vaddwnq.128B
- __builtin_HEXAGON_V6_vaddwnq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vaddwnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwq.128B
- __builtin_HEXAGON_V6_vaddwq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vaddwq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwsat.128B
__builtin_HEXAGON_V6_vaddwsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B
@@ -140,21 +141,21 @@ void test() {
// CHECK: @llvm.hexagon.V6.vand.128B
__builtin_HEXAGON_V6_vand_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vandnqrt.128B
- __builtin_HEXAGON_V6_vandnqrt_128B(v128, 0);
+ __builtin_HEXAGON_V6_vandnqrt_128B(q128, 0);
// CHECK: @llvm.hexagon.V6.vandnqrt.acc.128B
- __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, v128, 0);
+ __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, q128, 0);
// CHECK: @llvm.hexagon.V6.vandqrt.128B
- __builtin_HEXAGON_V6_vandqrt_128B(v128, 0);
+ __builtin_HEXAGON_V6_vandqrt_128B(q128, 0);
// CHECK: @llvm.hexagon.V6.vandqrt.acc.128B
- __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, v128, 0);
+ __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, q128, 0);
// CHECK: @llvm.hexagon.V6.vandvnqv.128B
- __builtin_HEXAGON_V6_vandvnqv_128B(v128, v128);
+ __builtin_HEXAGON_V6_vandvnqv_128B(q128, v128);
// CHECK: @llvm.hexagon.V6.vandvqv.128B
- __builtin_HEXAGON_V6_vandvqv_128B(v128, v128);
+ __builtin_HEXAGON_V6_vandvqv_128B(q128, v128);
// CHECK: @llvm.hexagon.V6.vandvrt.128B
__builtin_HEXAGON_V6_vandvrt_128B(v128, 0);
// CHECK: @llvm.hexagon.V6.vandvrt.acc.128B
- __builtin_HEXAGON_V6_vandvrt_acc_128B(v128, v128, 0);
+ __builtin_HEXAGON_V6_vandvrt_acc_128B(q128, v128, 0);
// CHECK: @llvm.hexagon.V6.vaslh.128B
__builtin_HEXAGON_V6_vaslh_128B(v128, 0);
// CHECK: @llvm.hexagon.V6.vaslh.acc.128B
@@ -296,87 +297,87 @@ void test() {
// CHECK: @llvm.hexagon.V6.veqb.128B
__builtin_HEXAGON_V6_veqb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.veqb.and.128B
- __builtin_HEXAGON_V6_veqb_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqb_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqb.or.128B
- __builtin_HEXAGON_V6_veqb_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqb_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqb.xor.128B
- __builtin_HEXAGON_V6_veqb_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqb_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.128B
__builtin_HEXAGON_V6_veqh_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.and.128B
- __builtin_HEXAGON_V6_veqh_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqh_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.or.128B
- __builtin_HEXAGON_V6_veqh_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqh_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.xor.128B
- __builtin_HEXAGON_V6_veqh_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqh_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.128B
__builtin_HEXAGON_V6_veqw_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.and.128B
- __builtin_HEXAGON_V6_veqw_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqw_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.or.128B
- __builtin_HEXAGON_V6_veqw_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqw_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.xor.128B
- __builtin_HEXAGON_V6_veqw_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_veqw_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgathermh.128B
__builtin_HEXAGON_V6_vgathermh_128B(0, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermhq.128B
- __builtin_HEXAGON_V6_vgathermhq_128B(0, v128, 0, 0, v128);
+ __builtin_HEXAGON_V6_vgathermhq_128B(0, q128, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermhw.128B
__builtin_HEXAGON_V6_vgathermhw_128B(0, 0, 0, v256);
// CHECK: @llvm.hexagon.V6.vgathermhwq.128B
- __builtin_HEXAGON_V6_vgathermhwq_128B(0, v128, 0, 0, v256);
+ __builtin_HEXAGON_V6_vgathermhwq_128B(0, q128, 0, 0, v256);
// CHECK: @llvm.hexagon.V6.vgathermw.128B
__builtin_HEXAGON_V6_vgathermw_128B(0, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermwq.128B
- __builtin_HEXAGON_V6_vgathermwq_128B(0, v128, 0, 0, v128);
+ __builtin_HEXAGON_V6_vgathermwq_128B(0, q128, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgtb.128B
__builtin_HEXAGON_V6_vgtb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtb.and.128B
- __builtin_HEXAGON_V6_vgtb_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtb_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtb.or.128B
- __builtin_HEXAGON_V6_vgtb_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtb_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtb.xor.128B
- __builtin_HEXAGON_V6_vgtb_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtb_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.128B
__builtin_HEXAGON_V6_vgth_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.and.128B
- __builtin_HEXAGON_V6_vgth_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgth_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.or.128B
- __builtin_HEXAGON_V6_vgth_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgth_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.xor.128B
- __builtin_HEXAGON_V6_vgth_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgth_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.128B
__builtin_HEXAGON_V6_vgtub_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.and.128B
- __builtin_HEXAGON_V6_vgtub_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtub_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.or.128B
- __builtin_HEXAGON_V6_vgtub_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtub_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.xor.128B
- __builtin_HEXAGON_V6_vgtub_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtub_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.128B
__builtin_HEXAGON_V6_vgtuh_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.and.128B
- __builtin_HEXAGON_V6_vgtuh_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtuh_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.or.128B
- __builtin_HEXAGON_V6_vgtuh_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtuh_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.xor.128B
- __builtin_HEXAGON_V6_vgtuh_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtuh_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.128B
__builtin_HEXAGON_V6_vgtuw_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.and.128B
- __builtin_HEXAGON_V6_vgtuw_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtuw_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.or.128B
- __builtin_HEXAGON_V6_vgtuw_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtuw_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.xor.128B
- __builtin_HEXAGON_V6_vgtuw_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtuw_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.128B
__builtin_HEXAGON_V6_vgtw_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.and.128B
- __builtin_HEXAGON_V6_vgtw_and_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtw_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.or.128B
- __builtin_HEXAGON_V6_vgtw_or_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtw_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.xor.128B
- __builtin_HEXAGON_V6_vgtw_xor_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vgtw_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vinsertwr.128B
__builtin_HEXAGON_V6_vinsertwr_128B(v128, 0);
// CHECK: @llvm.hexagon.V6.vlalignb.128B
@@ -416,13 +417,13 @@ void test() {
// CHECK: @llvm.hexagon.V6.vlutvwhi.128B
__builtin_HEXAGON_V6_vlutvwhi_128B(v128, v128, 0);
// CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B
- __builtin_HEXAGON_V6_vmaskedstorenq_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B
- __builtin_HEXAGON_V6_vmaskedstorentnq_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B
- __builtin_HEXAGON_V6_vmaskedstorentq_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B
- __builtin_HEXAGON_V6_vmaskedstoreq_128B(v128, 0, v128);
+ __builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaxb.128B
__builtin_HEXAGON_V6_vmaxb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vmaxh.128B
@@ -566,7 +567,7 @@ void test() {
// CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B
__builtin_HEXAGON_V6_vmpyuhv_acc_128B(v256, v128, v128);
// CHECK: @llvm.hexagon.V6.vmux.128B
- __builtin_HEXAGON_V6_vmux_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vmux_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vnavgb.128B
__builtin_HEXAGON_V6_vnavgb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vnavgh.128B
@@ -602,11 +603,11 @@ void test() {
// CHECK: @llvm.hexagon.V6.vpopcounth.128B
__builtin_HEXAGON_V6_vpopcounth_128B(v128);
// CHECK: @llvm.hexagon.V6.vprefixqb.128B
- __builtin_HEXAGON_V6_vprefixqb_128B(v128);
+ __builtin_HEXAGON_V6_vprefixqb_128B(q128);
// CHECK: @llvm.hexagon.V6.vprefixqh.128B
- __builtin_HEXAGON_V6_vprefixqh_128B(v128);
+ __builtin_HEXAGON_V6_vprefixqh_128B(q128);
// CHECK: @llvm.hexagon.V6.vprefixqw.128B
- __builtin_HEXAGON_V6_vprefixqw_128B(v128);
+ __builtin_HEXAGON_V6_vprefixqw_128B(q128);
// CHECK: @llvm.hexagon.V6.vrdelta.128B
__builtin_HEXAGON_V6_vrdelta_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vrmpybub.rtt.128B
@@ -676,19 +677,19 @@ void test() {
// CHECK: @llvm.hexagon.V6.vscattermh.add.128B
__builtin_HEXAGON_V6_vscattermh_add_128B(0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermhq.128B
- __builtin_HEXAGON_V6_vscattermhq_128B(v128, 0, 0, v128, v128);
+ __builtin_HEXAGON_V6_vscattermhq_128B(q128, 0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermhw.128B
__builtin_HEXAGON_V6_vscattermhw_128B(0, 0, v256, v128);
// CHECK: @llvm.hexagon.V6.vscattermhw.add.128B
__builtin_HEXAGON_V6_vscattermhw_add_128B(0, 0, v256, v128);
// CHECK: @llvm.hexagon.V6.vscattermhwq.128B
- __builtin_HEXAGON_V6_vscattermhwq_128B(v128, 0, 0, v256, v128);
+ __builtin_HEXAGON_V6_vscattermhwq_128B(q128, 0, 0, v256, v128);
// CHECK: @llvm.hexagon.V6.vscattermw.128B
__builtin_HEXAGON_V6_vscattermw_128B(0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermw.add.128B
__builtin_HEXAGON_V6_vscattermw_add_128B(0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermwq.128B
- __builtin_HEXAGON_V6_vscattermwq_128B(v128, 0, 0, v128, v128);
+ __builtin_HEXAGON_V6_vscattermwq_128B(q128, 0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vsh.128B
__builtin_HEXAGON_V6_vsh_128B(v128);
// CHECK: @llvm.hexagon.V6.vshufeh.128B
@@ -714,9 +715,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubb.dv.128B
__builtin_HEXAGON_V6_vsubb_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vsubbnq.128B
- __builtin_HEXAGON_V6_vsubbnq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vsubbnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbq.128B
- __builtin_HEXAGON_V6_vsubbq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vsubbq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbsat.128B
__builtin_HEXAGON_V6_vsubbsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbsat.dv.128B
@@ -728,9 +729,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubh.dv.128B
__builtin_HEXAGON_V6_vsubh_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vsubhnq.128B
- __builtin_HEXAGON_V6_vsubhnq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vsubhnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhq.128B
- __builtin_HEXAGON_V6_vsubhq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vsubhq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhsat.128B
__builtin_HEXAGON_V6_vsubhsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B
@@ -760,15 +761,15 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubw.dv.128B
__builtin_HEXAGON_V6_vsubw_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vsubwnq.128B
- __builtin_HEXAGON_V6_vsubwnq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vsubwnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwq.128B
- __builtin_HEXAGON_V6_vsubwq_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vsubwq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwsat.128B
__builtin_HEXAGON_V6_vsubwsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B
__builtin_HEXAGON_V6_vsubwsat_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vswap.128B
- __builtin_HEXAGON_V6_vswap_128B(v128, v128, v128);
+ __builtin_HEXAGON_V6_vswap_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vtmpyb.128B
__builtin_HEXAGON_V6_vtmpyb_128B(v256, 0);
// CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B
diff --git a/clang/test/CodeGen/builtins-hvx64.c b/clang/test/CodeGen/builtins-hvx64.c
index 5a53296e7276..27d39990adb5 100644
--- a/clang/test/CodeGen/builtins-hvx64.c
+++ b/clang/test/CodeGen/builtins-hvx64.c
@@ -2,6 +2,7 @@
// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length64b -emit-llvm %s -o - | FileCheck %s
void test() {
+ int q64 __attribute__((__vector_size__(64)));
int v64 __attribute__((__vector_size__(64)));
int v128 __attribute__((__vector_size__(128)));
@@ -18,33 +19,33 @@ void test() {
// CHECK: @llvm.hexagon.V6.lvsplatw
__builtin_HEXAGON_V6_lvsplatw(0);
// CHECK: @llvm.hexagon.V6.pred.and
- __builtin_HEXAGON_V6_pred_and(v64, v64);
+ __builtin_HEXAGON_V6_pred_and(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.and.n
- __builtin_HEXAGON_V6_pred_and_n(v64, v64);
+ __builtin_HEXAGON_V6_pred_and_n(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.not
- __builtin_HEXAGON_V6_pred_not(v64);
+ __builtin_HEXAGON_V6_pred_not(q64);
// CHECK: @llvm.hexagon.V6.pred.or
- __builtin_HEXAGON_V6_pred_or(v64, v64);
+ __builtin_HEXAGON_V6_pred_or(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.or.n
- __builtin_HEXAGON_V6_pred_or_n(v64, v64);
+ __builtin_HEXAGON_V6_pred_or_n(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.scalar2
__builtin_HEXAGON_V6_pred_scalar2(0);
// CHECK: @llvm.hexagon.V6.pred.scalar2v2
__builtin_HEXAGON_V6_pred_scalar2v2(0);
// CHECK: @llvm.hexagon.V6.pred.xor
- __builtin_HEXAGON_V6_pred_xor(v64, v64);
+ __builtin_HEXAGON_V6_pred_xor(q64, q64);
// CHECK: @llvm.hexagon.V6.shuffeqh
- __builtin_HEXAGON_V6_shuffeqh(v64, v64);
+ __builtin_HEXAGON_V6_shuffeqh(q64, q64);
// CHECK: @llvm.hexagon.V6.shuffeqw
- __builtin_HEXAGON_V6_shuffeqw(v64, v64);
+ __builtin_HEXAGON_V6_shuffeqw(q64, q64);
// CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai
- __builtin_HEXAGON_V6_vS32b_nqpred_ai(v64, 0, v64);
+ __builtin_HEXAGON_V6_vS32b_nqpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai
- __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(v64, 0, v64);
+ __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai
- __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(v64, 0, v64);
+ __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vS32b.qpred.ai
- __builtin_HEXAGON_V6_vS32b_qpred_ai(v64, 0, v64);
+ __builtin_HEXAGON_V6_vS32b_qpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vabsb
__builtin_HEXAGON_V6_vabsb(v64);
// CHECK: @llvm.hexagon.V6.vabsb.sat
@@ -70,9 +71,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddb.dv
__builtin_HEXAGON_V6_vaddb_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbnq
- __builtin_HEXAGON_V6_vaddbnq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vaddbnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddbq
- __builtin_HEXAGON_V6_vaddbq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vaddbq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddbsat
__builtin_HEXAGON_V6_vaddbsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vaddbsat.dv
@@ -88,9 +89,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddh.dv
__builtin_HEXAGON_V6_vaddh_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhnq
- __builtin_HEXAGON_V6_vaddhnq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vaddhnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddhq
- __builtin_HEXAGON_V6_vaddhq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vaddhq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddhsat
__builtin_HEXAGON_V6_vaddhsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vaddhsat.dv
@@ -126,9 +127,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddw.dv
__builtin_HEXAGON_V6_vaddw_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwnq
- __builtin_HEXAGON_V6_vaddwnq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vaddwnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddwq
- __builtin_HEXAGON_V6_vaddwq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vaddwq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddwsat
__builtin_HEXAGON_V6_vaddwsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vaddwsat.dv
@@ -140,21 +141,21 @@ void test() {
// CHECK: @llvm.hexagon.V6.vand
__builtin_HEXAGON_V6_vand(v64, v64);
// CHECK: @llvm.hexagon.V6.vandnqrt
- __builtin_HEXAGON_V6_vandnqrt(v64, 0);
+ __builtin_HEXAGON_V6_vandnqrt(q64, 0);
// CHECK: @llvm.hexagon.V6.vandnqrt.acc
- __builtin_HEXAGON_V6_vandnqrt_acc(v64, v64, 0);
+ __builtin_HEXAGON_V6_vandnqrt_acc(v64, q64, 0);
// CHECK: @llvm.hexagon.V6.vandqrt
- __builtin_HEXAGON_V6_vandqrt(v64, 0);
+ __builtin_HEXAGON_V6_vandqrt(q64, 0);
// CHECK: @llvm.hexagon.V6.vandqrt.acc
- __builtin_HEXAGON_V6_vandqrt_acc(v64, v64, 0);
+ __builtin_HEXAGON_V6_vandqrt_acc(v64, q64, 0);
// CHECK: @llvm.hexagon.V6.vandvnqv
- __builtin_HEXAGON_V6_vandvnqv(v64, v64);
+ __builtin_HEXAGON_V6_vandvnqv(q64, v64);
// CHECK: @llvm.hexagon.V6.vandvqv
- __builtin_HEXAGON_V6_vandvqv(v64, v64);
+ __builtin_HEXAGON_V6_vandvqv(q64, v64);
// CHECK: @llvm.hexagon.V6.vandvrt
__builtin_HEXAGON_V6_vandvrt(v64, 0);
// CHECK: @llvm.hexagon.V6.vandvrt.acc
- __builtin_HEXAGON_V6_vandvrt_acc(v64, v64, 0);
+ __builtin_HEXAGON_V6_vandvrt_acc(q64, v64, 0);
// CHECK: @llvm.hexagon.V6.vaslh
__builtin_HEXAGON_V6_vaslh(v64, 0);
// CHECK: @llvm.hexagon.V6.vaslh.acc
@@ -296,87 +297,87 @@ void test() {
// CHECK: @llvm.hexagon.V6.veqb
__builtin_HEXAGON_V6_veqb(v64, v64);
// CHECK: @llvm.hexagon.V6.veqb.and
- __builtin_HEXAGON_V6_veqb_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqb_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqb.or
- __builtin_HEXAGON_V6_veqb_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqb_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqb.xor
- __builtin_HEXAGON_V6_veqb_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqb_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqh
__builtin_HEXAGON_V6_veqh(v64, v64);
// CHECK: @llvm.hexagon.V6.veqh.and
- __builtin_HEXAGON_V6_veqh_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqh_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqh.or
- __builtin_HEXAGON_V6_veqh_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqh_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqh.xor
- __builtin_HEXAGON_V6_veqh_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqh_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqw
__builtin_HEXAGON_V6_veqw(v64, v64);
// CHECK: @llvm.hexagon.V6.veqw.and
- __builtin_HEXAGON_V6_veqw_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqw_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqw.or
- __builtin_HEXAGON_V6_veqw_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqw_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqw.xor
- __builtin_HEXAGON_V6_veqw_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_veqw_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgathermh
__builtin_HEXAGON_V6_vgathermh(0, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgathermhq
- __builtin_HEXAGON_V6_vgathermhq(0, v64, 0, 0, v64);
+ __builtin_HEXAGON_V6_vgathermhq(0, q64, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgathermhw
__builtin_HEXAGON_V6_vgathermhw(0, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermhwq
- __builtin_HEXAGON_V6_vgathermhwq(0, v64, 0, 0, v128);
+ __builtin_HEXAGON_V6_vgathermhwq(0, q64, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermw
__builtin_HEXAGON_V6_vgathermw(0, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgathermwq
- __builtin_HEXAGON_V6_vgathermwq(0, v64, 0, 0, v64);
+ __builtin_HEXAGON_V6_vgathermwq(0, q64, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgtb
__builtin_HEXAGON_V6_vgtb(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtb.and
- __builtin_HEXAGON_V6_vgtb_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtb_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtb.or
- __builtin_HEXAGON_V6_vgtb_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtb_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtb.xor
- __builtin_HEXAGON_V6_vgtb_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtb_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgth
__builtin_HEXAGON_V6_vgth(v64, v64);
// CHECK: @llvm.hexagon.V6.vgth.and
- __builtin_HEXAGON_V6_vgth_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgth_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgth.or
- __builtin_HEXAGON_V6_vgth_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgth_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgth.xor
- __builtin_HEXAGON_V6_vgth_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgth_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub
__builtin_HEXAGON_V6_vgtub(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub.and
- __builtin_HEXAGON_V6_vgtub_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtub_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub.or
- __builtin_HEXAGON_V6_vgtub_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtub_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub.xor
- __builtin_HEXAGON_V6_vgtub_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtub_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh
__builtin_HEXAGON_V6_vgtuh(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh.and
- __builtin_HEXAGON_V6_vgtuh_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtuh_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh.or
- __builtin_HEXAGON_V6_vgtuh_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtuh_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh.xor
- __builtin_HEXAGON_V6_vgtuh_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtuh_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw
__builtin_HEXAGON_V6_vgtuw(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw.and
- __builtin_HEXAGON_V6_vgtuw_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtuw_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw.or
- __builtin_HEXAGON_V6_vgtuw_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtuw_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw.xor
- __builtin_HEXAGON_V6_vgtuw_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtuw_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw
__builtin_HEXAGON_V6_vgtw(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw.and
- __builtin_HEXAGON_V6_vgtw_and(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtw_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw.or
- __builtin_HEXAGON_V6_vgtw_or(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtw_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw.xor
- __builtin_HEXAGON_V6_vgtw_xor(v64, v64, v64);
+ __builtin_HEXAGON_V6_vgtw_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vinsertwr
__builtin_HEXAGON_V6_vinsertwr(v64, 0);
// CHECK: @llvm.hexagon.V6.vlalignb
@@ -416,13 +417,13 @@ void test() {
// CHECK: @llvm.hexagon.V6.vlutvwhi
__builtin_HEXAGON_V6_vlutvwhi(v64, v64, 0);
// CHECK: @llvm.hexagon.V6.vmaskedstorenq
- __builtin_HEXAGON_V6_vmaskedstorenq(v64, 0, v64);
+ __builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaskedstorentnq
- __builtin_HEXAGON_V6_vmaskedstorentnq(v64, 0, v64);
+ __builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaskedstorentq
- __builtin_HEXAGON_V6_vmaskedstorentq(v64, 0, v64);
+ __builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaskedstoreq
- __builtin_HEXAGON_V6_vmaskedstoreq(v64, 0, v64);
+ __builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaxb
__builtin_HEXAGON_V6_vmaxb(v64, v64);
// CHECK: @llvm.hexagon.V6.vmaxh
@@ -566,7 +567,7 @@ void test() {
// CHECK: @llvm.hexagon.V6.vmpyuhv.acc
__builtin_HEXAGON_V6_vmpyuhv_acc(v128, v64, v64);
// CHECK: @llvm.hexagon.V6.vmux
- __builtin_HEXAGON_V6_vmux(v64, v64, v64);
+ __builtin_HEXAGON_V6_vmux(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vnavgb
__builtin_HEXAGON_V6_vnavgb(v64, v64);
// CHECK: @llvm.hexagon.V6.vnavgh
@@ -602,11 +603,11 @@ void test() {
// CHECK: @llvm.hexagon.V6.vpopcounth
__builtin_HEXAGON_V6_vpopcounth(v64);
// CHECK: @llvm.hexagon.V6.vprefixqb
- __builtin_HEXAGON_V6_vprefixqb(v64);
+ __builtin_HEXAGON_V6_vprefixqb(q64);
// CHECK: @llvm.hexagon.V6.vprefixqh
- __builtin_HEXAGON_V6_vprefixqh(v64);
+ __builtin_HEXAGON_V6_vprefixqh(q64);
// CHECK: @llvm.hexagon.V6.vprefixqw
- __builtin_HEXAGON_V6_vprefixqw(v64);
+ __builtin_HEXAGON_V6_vprefixqw(q64);
// CHECK: @llvm.hexagon.V6.vrdelta
__builtin_HEXAGON_V6_vrdelta(v64, v64);
// CHECK: @llvm.hexagon.V6.vrmpybub.rtt
@@ -676,19 +677,19 @@ void test() {
// CHECK: @llvm.hexagon.V6.vscattermh.add
__builtin_HEXAGON_V6_vscattermh_add(0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermhq
- __builtin_HEXAGON_V6_vscattermhq(v64, 0, 0, v64, v64);
+ __builtin_HEXAGON_V6_vscattermhq(q64, 0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermhw
__builtin_HEXAGON_V6_vscattermhw(0, 0, v128, v64);
// CHECK: @llvm.hexagon.V6.vscattermhw.add
__builtin_HEXAGON_V6_vscattermhw_add(0, 0, v128, v64);
// CHECK: @llvm.hexagon.V6.vscattermhwq
- __builtin_HEXAGON_V6_vscattermhwq(v64, 0, 0, v128, v64);
+ __builtin_HEXAGON_V6_vscattermhwq(q64, 0, 0, v128, v64);
// CHECK: @llvm.hexagon.V6.vscattermw
__builtin_HEXAGON_V6_vscattermw(0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermw.add
__builtin_HEXAGON_V6_vscattermw_add(0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermwq
- __builtin_HEXAGON_V6_vscattermwq(v64, 0, 0, v64, v64);
+ __builtin_HEXAGON_V6_vscattermwq(q64, 0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vsh
__builtin_HEXAGON_V6_vsh(v64);
// CHECK: @llvm.hexagon.V6.vshufeh
@@ -714,9 +715,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubb.dv
__builtin_HEXAGON_V6_vsubb_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbnq
- __builtin_HEXAGON_V6_vsubbnq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vsubbnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubbq
- __builtin_HEXAGON_V6_vsubbq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vsubbq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubbsat
__builtin_HEXAGON_V6_vsubbsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vsubbsat.dv
@@ -728,9 +729,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubh.dv
__builtin_HEXAGON_V6_vsubh_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhnq
- __builtin_HEXAGON_V6_vsubhnq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vsubhnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubhq
- __builtin_HEXAGON_V6_vsubhq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vsubhq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubhsat
__builtin_HEXAGON_V6_vsubhsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vsubhsat.dv
@@ -760,15 +761,15 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubw.dv
__builtin_HEXAGON_V6_vsubw_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwnq
- __builtin_HEXAGON_V6_vsubwnq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vsubwnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubwq
- __builtin_HEXAGON_V6_vsubwq(v64, v64, v64);
+ __builtin_HEXAGON_V6_vsubwq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubwsat
__builtin_HEXAGON_V6_vsubwsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vsubwsat.dv
__builtin_HEXAGON_V6_vsubwsat_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vswap
- __builtin_HEXAGON_V6_vswap(v64, v64, v64);
+ __builtin_HEXAGON_V6_vswap(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vtmpyb
__builtin_HEXAGON_V6_vtmpyb(v128, 0);
// CHECK: @llvm.hexagon.V6.vtmpyb.acc
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index f82cac156eca..3e0e8fae7b93 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -258,44 +258,22 @@ Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
// Masked vector stores
//
-//
-// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai
-class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
- [IntrArgMemOnly]>;
-
-//
-// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai_128B
-class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
- [IntrArgMemOnly]>;
-
-def int_hexagon_V6_vmaskedstoreq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
-
-def int_hexagon_V6_vmaskedstorenq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
-
-def int_hexagon_V6_vmaskedstorentq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
-
-def int_hexagon_V6_vmaskedstorentnq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
-
-def int_hexagon_V6_vmaskedstoreq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
-
-def int_hexagon_V6_vmaskedstorenq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
-
-def int_hexagon_V6_vmaskedstorentq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
-
-def int_hexagon_V6_vmaskedstorentnq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
+class Hexagon_custom_vms_Intrinsic
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v64i1_ty,llvm_ptr_ty,llvm_v16i32_ty], [IntrWriteMem]>;
+
+class Hexagon_custom_vms_Intrinsic_128B
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v128i1_ty,llvm_ptr_ty,llvm_v32i32_ty], [IntrWriteMem]>;
+
+def int_hexagon_V6_vmaskedstoreq: Hexagon_custom_vms_Intrinsic;
+def int_hexagon_V6_vmaskedstorenq: Hexagon_custom_vms_Intrinsic;
+def int_hexagon_V6_vmaskedstorentq: Hexagon_custom_vms_Intrinsic;
+def int_hexagon_V6_vmaskedstorentnq: Hexagon_custom_vms_Intrinsic;
+
+def int_hexagon_V6_vmaskedstoreq_128B: Hexagon_custom_vms_Intrinsic_128B;
+def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
+def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
+def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
include "llvm/IR/IntrinsicsHexagonDep.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
index e81ac9ba8519..67a06f5c06f4 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
@@ -548,17 +548,17 @@ class Hexagon_v64i32_v64i32v32i32_Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vS32b_qpred_ai
-class Hexagon__v512i1ptrv16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v64i1ptrv16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v64i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vS32b_qpred_ai
-class Hexagon__v1024i1ptrv32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v128i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_valignb
@@ -660,31 +660,31 @@ class Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vaddcarrysat
-class Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v64i1_ty],
intr_properties>;
// tag : V6_vaddcarrysat
-class Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
intr_properties>;
// tag : V6_vaddcarry
-class Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic<
+class Hexagon_custom_v16i32v64i1_v16i32v16i32v64i1_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
: Hexagon_NonGCC_Intrinsic<
- [llvm_v16i32_ty,llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+ [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v64i1_ty],
intr_properties>;
// tag : V6_vaddcarry
-class Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B<
+class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
: Hexagon_NonGCC_Intrinsic<
- [llvm_v32i32_ty,llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+ [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
intr_properties>;
// tag : V6_vaddubh
@@ -702,17 +702,17 @@ class Hexagon_v16i32__Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vaddbq
-class Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vaddbq
-class Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vabsb
@@ -751,157 +751,157 @@ class Hexagon_v64i32_v64i32v32i32i32_Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vandqrt
-class Hexagon_v16i32_v512i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty], [llvm_v64i1_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandqrt
-class Hexagon_v32i32_v1024i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v128i1_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandqrt_acc
-class Hexagon_v16i32_v16i32v512i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v64i1_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandqrt_acc
-class Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v128i1_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandvrt
-class Hexagon_v512i1_v16i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v16i32i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandvrt
-class Hexagon_v1024i1_v32i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandvrt_acc
-class Hexagon_v512i1_v512i1v16i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandvrt_acc
-class Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_i32_ty],
intr_properties>;
// tag : V6_vandvqv
-class Hexagon_v16i32_v512i1v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty], [llvm_v64i1_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vandvqv
-class Hexagon_v32i32_v1024i1v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v128i1_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vgtw
-class Hexagon_v512i1_v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v16i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vgtw
-class Hexagon_v1024i1_v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vgtw_and
-class Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vgtw_and
-class Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_pred_scalar2
-class Hexagon_v512i1_i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_i32_ty],
intr_properties>;
// tag : V6_pred_scalar2
-class Hexagon_v1024i1_i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_i32_ty],
intr_properties>;
// tag : V6_shuffeqw
-class Hexagon_v512i1_v512i1v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1v64i1_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_v64i1_ty,llvm_v64i1_ty],
intr_properties>;
// tag : V6_shuffeqw
-class Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_v128i1_ty,llvm_v128i1_ty],
intr_properties>;
// tag : V6_pred_not
-class Hexagon_v512i1_v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i1_v64i1_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v512i1_ty], [llvm_v512i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i1_ty], [llvm_v64i1_ty],
intr_properties>;
// tag : V6_pred_not
-class Hexagon_v1024i1_v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v128i1_v128i1_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v1024i1_ty], [llvm_v1024i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v128i1_ty], [llvm_v128i1_ty],
intr_properties>;
// tag : V6_vswap
-class Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v64i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vswap
-class Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v64i32_ty], [llvm_v128i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vshuffvdd
@@ -982,31 +982,31 @@ class Hexagon__ptri32i32v64i32_Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vgathermwq
-class Hexagon__ptrv512i1i32i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_ptr_ty,llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vgathermwq
-class Hexagon__ptrv1024i1i32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_ptr_ty,llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vgathermhwq
-class Hexagon__ptrv512i1i32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_ptr_ty,llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vgathermhwq
-class Hexagon__ptrv1024i1i32i32v64i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_ptr_ty,llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty],
intr_properties>;
// tag : V6_vscattermw
@@ -1024,17 +1024,17 @@ class Hexagon__i32i32v32i32v32i32_Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vscattermwq
-class Hexagon__v512i1i32i32v16i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vscattermwq
-class Hexagon__v1024i1i32i32v32i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vscattermhw
@@ -1052,31 +1052,31 @@ class Hexagon__i32i32v64i32v32i32_Intrinsic<string GCCIntSuffix,
intr_properties>;
// tag : V6_vscattermhwq
-class Hexagon__v512i1i32i32v32i32v16i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v512i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v16i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v64i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v32i32_ty,llvm_v16i32_ty],
intr_properties>;
// tag : V6_vscattermhwq
-class Hexagon__v1024i1i32i32v64i32v32i32_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [], [llvm_v1024i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty,llvm_v32i32_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [], [llvm_v128i1_ty,llvm_i32_ty,llvm_i32_ty,llvm_v64i32_ty,llvm_v32i32_ty],
intr_properties>;
// tag : V6_vprefixqb
-class Hexagon_v16i32_v512i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v16i32_v64i1_Intrinsic<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v16i32_ty], [llvm_v512i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty], [llvm_v64i1_ty],
intr_properties>;
// tag : V6_vprefixqb
-class Hexagon_v32i32_v1024i1_Intrinsic<string GCCIntSuffix,
+class Hexagon_custom_v32i32_v128i1_Intrinsic_128B<
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_v32i32_ty], [llvm_v1024i1_ty],
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty], [llvm_v128i1_ty],
intr_properties>;
// V5 Scalar Instructions.
@@ -3779,28 +3779,28 @@ Hexagon_double_doubledoubledouble_Intrinsic<"HEXAGON_F2_dfmpyhh", [IntrNoMem, Th
// V60 HVX Instructions.
def int_hexagon_V6_vS32b_qpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_qpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_nqpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_nqpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_nt_qpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_nt_qpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_nt_nqpred_ai :
-Hexagon__v512i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai", [IntrWriteMem]>;
+Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vS32b_nt_nqpred_ai_128B :
-Hexagon__v1024i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_valignb :
Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignb">;
@@ -4643,76 +4643,76 @@ def int_hexagon_V6_vd0_128B :
Hexagon_v32i32__Intrinsic<"HEXAGON_V6_vd0_128B">;
def int_hexagon_V6_vaddbq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vaddbq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vsubbq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vsubbq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vaddbnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vaddbnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vsubbnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vsubbnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vaddhq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vaddhq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vsubhq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vsubhq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vaddhnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vaddhnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vsubhnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vsubhnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vaddwq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vaddwq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vsubwq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vsubwq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vaddwnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vaddwnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vsubwnq :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwnq">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vsubwnq_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vabsh :
Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsh">;
@@ -5045,298 +5045,298 @@ def int_hexagon_V6_vnot_128B :
Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnot_128B">;
def int_hexagon_V6_vandqrt :
-Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt">;
+Hexagon_custom_v16i32_v64i1i32_Intrinsic;
def int_hexagon_V6_vandqrt_128B :
-Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B;
def int_hexagon_V6_vandqrt_acc :
-Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
+Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic;
def int_hexagon_V6_vandqrt_acc_128B :
-Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
+Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B;
def int_hexagon_V6_vandvrt :
-Hexagon_v512i1_v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt">;
+Hexagon_custom_v64i1_v16i32i32_Intrinsic;
def int_hexagon_V6_vandvrt_128B :
-Hexagon_v1024i1_v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
+Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B;
def int_hexagon_V6_vandvrt_acc :
-Hexagon_v512i1_v512i1v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic;
def int_hexagon_V6_vandvrt_acc_128B :
-Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
+Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B;
def int_hexagon_V6_vgtw :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtw_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtw_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtw_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtw_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtw_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtw_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtw_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqw :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqw_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqw_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqw_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqw_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqw_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqw_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqw_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgth :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgth_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgth_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgth_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgth_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgth_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgth_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgth_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqh :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqh_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqh_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqh_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqh_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqh_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqh_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqh_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtb :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtb_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtb_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtb_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtb_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtb_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtb_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtb_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqb :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqb_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqb_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqb_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqb_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqb_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_veqb_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_veqb_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuw :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuw_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuw_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuw_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuw_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuw_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuw_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuw_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuh :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuh_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuh_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuh_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuh_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuh_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtuh_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtuh_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtub :
-Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub">;
+Hexagon_custom_v64i1_v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtub_128B :
-Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtub_and :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_and">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtub_and_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtub_or :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_or">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtub_or_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vgtub_xor :
-Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_xor">;
+Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vgtub_xor_128B :
-Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_pred_or :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_pred_or_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_pred_and :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_pred_and_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_pred_not :
-Hexagon_v512i1_v512i1_Intrinsic<"HEXAGON_V6_pred_not">;
+Hexagon_custom_v64i1_v64i1_Intrinsic;
def int_hexagon_V6_pred_not_128B :
-Hexagon_v1024i1_v1024i1_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+Hexagon_custom_v128i1_v128i1_Intrinsic_128B;
def int_hexagon_V6_pred_xor :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_xor">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_pred_xor_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_pred_and_n :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and_n">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_pred_and_n_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_pred_or_n :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or_n">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_pred_or_n_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_pred_scalar2 :
-Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+Hexagon_custom_v64i1_i32_Intrinsic;
def int_hexagon_V6_pred_scalar2_128B :
-Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+Hexagon_custom_v128i1_i32_Intrinsic_128B;
def int_hexagon_V6_vmux :
-Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vmux">;
+Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vmux_128B :
-Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vmux_128B">;
+Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vswap :
-Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vswap">;
+Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic;
def int_hexagon_V6_vswap_128B :
-Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vswap_128B">;
+Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B;
def int_hexagon_V6_vmaxub :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxub">;
@@ -5677,16 +5677,16 @@ def int_hexagon_V6_vsubbsat_dv_128B :
Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubbsat_dv_128B">;
def int_hexagon_V6_vaddcarry :
-Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
+Hexagon_custom_v16i32v64i1_v16i32v16i32v64i1_Intrinsic;
def int_hexagon_V6_vaddcarry_128B :
-Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
+Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B;
def int_hexagon_V6_vsubcarry :
-Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
+Hexagon_custom_v16i32v64i1_v16i32v16i32v64i1_Intrinsic;
def int_hexagon_V6_vsubcarry_128B :
-Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
+Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B;
def int_hexagon_V6_vaddububb_sat :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
@@ -5755,46 +5755,46 @@ def int_hexagon_V6_vmpyiwub_acc_128B :
Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_acc_128B">;
def int_hexagon_V6_vandnqrt :
-Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt">;
+Hexagon_custom_v16i32_v64i1i32_Intrinsic;
def int_hexagon_V6_vandnqrt_128B :
-Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_128B">;
+Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B;
def int_hexagon_V6_vandnqrt_acc :
-Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc">;
+Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic;
def int_hexagon_V6_vandnqrt_acc_128B :
-Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">;
+Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B;
def int_hexagon_V6_vandvqv :
-Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvqv">;
+Hexagon_custom_v16i32_v64i1v16i32_Intrinsic;
def int_hexagon_V6_vandvqv_128B :
-Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvqv_128B">;
+Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B;
def int_hexagon_V6_vandvnqv :
-Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvnqv">;
+Hexagon_custom_v16i32_v64i1v16i32_Intrinsic;
def int_hexagon_V6_vandvnqv_128B :
-Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvnqv_128B">;
+Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B;
def int_hexagon_V6_pred_scalar2v2 :
-Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2">;
+Hexagon_custom_v64i1_i32_Intrinsic;
def int_hexagon_V6_pred_scalar2v2_128B :
-Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">;
+Hexagon_custom_v128i1_i32_Intrinsic_128B;
def int_hexagon_V6_shuffeqw :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqw">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_shuffeqw_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqw_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_shuffeqh :
-Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqh">;
+Hexagon_custom_v64i1_v64i1v64i1_Intrinsic;
def int_hexagon_V6_shuffeqh_128B :
-Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqh_128B">;
+Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B;
def int_hexagon_V6_vmaxb :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxb">;
@@ -6027,22 +6027,22 @@ def int_hexagon_V6_vgathermhw_128B :
Hexagon__ptri32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhw_128B", [IntrArgMemOnly]>;
def int_hexagon_V6_vgathermwq :
-Hexagon__ptrv512i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermwq", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>;
def int_hexagon_V6_vgathermwq_128B :
-Hexagon__ptrv1024i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermwq_128B", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>;
def int_hexagon_V6_vgathermhq :
-Hexagon__ptrv512i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermhq", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>;
def int_hexagon_V6_vgathermhq_128B :
-Hexagon__ptrv1024i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhq_128B", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>;
def int_hexagon_V6_vgathermhwq :
-Hexagon__ptrv512i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhwq", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic<[IntrArgMemOnly]>;
def int_hexagon_V6_vgathermhwq_128B :
-Hexagon__ptrv1024i1i32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhwq_128B", [IntrArgMemOnly]>;
+Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B<[IntrArgMemOnly]>;
def int_hexagon_V6_vscattermw :
Hexagon__i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermw", [IntrWriteMem]>;
@@ -6069,16 +6069,16 @@ def int_hexagon_V6_vscattermh_add_128B :
Hexagon__i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermh_add_128B", [IntrWriteMem]>;
def int_hexagon_V6_vscattermwq :
-Hexagon__v512i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermwq", [IntrWriteMem]>;
+Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vscattermwq_128B :
-Hexagon__v1024i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermwq_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_vscattermhq :
-Hexagon__v512i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhq", [IntrWriteMem]>;
+Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vscattermhq_128B :
-Hexagon__v1024i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhq_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_vscattermhw :
Hexagon__i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhw", [IntrWriteMem]>;
@@ -6087,10 +6087,10 @@ def int_hexagon_V6_vscattermhw_128B :
Hexagon__i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhw_128B", [IntrWriteMem]>;
def int_hexagon_V6_vscattermhwq :
-Hexagon__v512i1i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhwq", [IntrWriteMem]>;
+Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic<[IntrWriteMem]>;
def int_hexagon_V6_vscattermhwq_128B :
-Hexagon__v1024i1i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhwq_128B", [IntrWriteMem]>;
+Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B<[IntrWriteMem]>;
def int_hexagon_V6_vscattermhw_add :
Hexagon__i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhw_add", [IntrWriteMem]>;
@@ -6099,22 +6099,22 @@ def int_hexagon_V6_vscattermhw_add_128B :
Hexagon__i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B", [IntrWriteMem]>;
def int_hexagon_V6_vprefixqb :
-Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqb">;
+Hexagon_custom_v16i32_v64i1_Intrinsic;
def int_hexagon_V6_vprefixqb_128B :
-Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqb_128B">;
+Hexagon_custom_v32i32_v128i1_Intrinsic_128B;
def int_hexagon_V6_vprefixqh :
-Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqh">;
+Hexagon_custom_v16i32_v64i1_Intrinsic;
def int_hexagon_V6_vprefixqh_128B :
-Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqh_128B">;
+Hexagon_custom_v32i32_v128i1_Intrinsic_128B;
def int_hexagon_V6_vprefixqw :
-Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqw">;
+Hexagon_custom_v16i32_v64i1_Intrinsic;
def int_hexagon_V6_vprefixqw_128B :
-Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqw_128B">;
+Hexagon_custom_v32i32_v128i1_Intrinsic_128B;
// V66 HVX Instructions.
@@ -6131,10 +6131,10 @@ def int_hexagon_V6_vasr_into_128B :
Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vasr_into_128B">;
def int_hexagon_V6_vaddcarrysat :
-Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<"HEXAGON_V6_vaddcarrysat">;
+Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic;
def int_hexagon_V6_vaddcarrysat_128B :
-Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<"HEXAGON_V6_vaddcarrysat_128B">;
+Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B;
def int_hexagon_V6_vsatdw :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatdw">;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 49bc133545ee..c0f92042e5da 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1199,7 +1199,7 @@ OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
MVT ByteTy = getSingleVT(MVT::i8);
- MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
const SDLoc &dl(Results.InpNode);
SDValue B = getVectorConstant(Bytes, dl);
Results.push(Hexagon::V6_vd0, ByteTy, {});
@@ -2203,28 +2203,28 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
case Intrinsic::hexagon_V6_vaddcarry: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
- SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vaddcarry_128B: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
- SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vsubcarry: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
- SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vsubcarry_128B: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
- SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
break;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0a23b50986fa..284c6b204c3a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1080,42 +1080,24 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-static Constant *convert_i1_to_i8(const Constant *ConstVal) {
- SmallVector<Constant *, 128> NewConst;
- const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
- if (!CV)
- return nullptr;
-
- LLVMContext &Ctx = ConstVal->getContext();
- IRBuilder<> IRB(Ctx);
- unsigned NumVectorElements = CV->getNumOperands();
- assert(isPowerOf2_32(NumVectorElements) &&
- "conversion only supported for pow2 VectorSize!");
-
- for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
- uint8_t x = 0;
- for (unsigned j = 0; j < 8; ++j) {
- uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
- x |= y << (7 - j);
- }
- assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
- NewConst.push_back(IRB.getInt8(x));
- }
- return ConstantVector::get(NewConst);
-}
-
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
Constant *CVal = nullptr;
bool isVTi1Type = false;
- if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
- Type *CValTy = ConstVal->getType();
- if (CValTy->isVectorTy() &&
- CValTy->getVectorElementType()->isIntegerTy(1)) {
- CVal = convert_i1_to_i8(ConstVal);
- isVTi1Type = (CVal != nullptr);
+ if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
+ if (CV->getType()->getVectorElementType()->isIntegerTy(1)) {
+ IRBuilder<> IRB(CV->getContext());
+ SmallVector<Constant*, 128> NewConst;
+ unsigned VecLen = CV->getNumOperands();
+ assert(isPowerOf2_32(VecLen) &&
+ "conversion only supported for pow2 VectorSize");
+ for (unsigned i = 0; i < VecLen; ++i)
+ NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
+
+ CVal = ConstantVector::get(NewConst);
+ isVTi1Type = true;
}
}
unsigned Align = CPN->getAlignment();
@@ -3225,8 +3207,8 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
switch (VT.getSizeInBits()) {
default:
return {0u, nullptr};
- case 512:
- case 1024:
+ case 64:
+ case 128:
return {0u, &Hexagon::HvxQRRegClass};
}
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 204950f9010e..b18afb209240 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -39,7 +39,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
} else if (Subtarget.useHVX128BOps()) {
addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
@@ -50,7 +49,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
}
// Set up operation actions.
diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 8ae55b207188..4f0e7e8ed2cc 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -277,76 +277,6 @@ def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
Requires<[UseHVX]>;
}
-def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
- (v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
- (v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
- (v512i1 (V6_vandvrt (v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
- (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
- (v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
- (v64i8 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
- (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
- (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-let AddedComplexity = 140 in {
-def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
- (V6_vS32b_ai IntRegs:$addr, 0,
- (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
- (v512i1 (V6_vandvrt
- (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
- (V6_vS32b_ai IntRegs:$addr, 0,
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
- (v1024i1 (V6_vandvrt
- (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-}
-
def: Pat<(v64i16 (trunc v64i32:$Vdd)),
(v64i16 (V6_vpackwh_sat
(v32i32 (V6_hi HvxWR:$Vdd)),
diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index a60c80beb5d6..1245ee7974b5 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -25,59 +25,59 @@ def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
(v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi)) >;
}
-def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
- (v512i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v16i32 HvxVR:$src1))),
+ (v64i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
- (v512i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v32i16 HvxVR:$src1))),
+ (v64i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
- (v512i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v64i8 HvxVR:$src1))),
+ (v64i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
- (v16i32 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v16i32 (bitconvert (v64i1 HvxQR:$src1))),
+ (v16i32 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
- (v32i16 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v32i16 (bitconvert (v64i1 HvxQR:$src1))),
+ (v32i16 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
- (v64i8 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i8 (bitconvert (v64i1 HvxQR:$src1))),
+ (v64i8 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v32i32 HvxVR:$src1))),
+ (v128i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v64i16 HvxVR:$src1))),
+ (v128i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v128i8 HvxVR:$src1))),
+ (v128i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v32i32 (bitconvert (v128i1 HvxQR:$src1))),
+ (v32i32 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
- (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i16 (bitconvert (v128i1 HvxQR:$src1))),
+ (v64i16 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
- (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i8 (bitconvert (v128i1 HvxQR:$src1))),
+ (v128i8 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
let AddedComplexity = 140 in {
-def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+def : Pat <(store (v64i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
- (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1),
+ (v16i32 (V6_vandqrt (v64i1 HvxQR:$src1),
(A2_tfrsi 0x01010101))))>;
-def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
- (v512i1 (V6_vandvrt
+def : Pat <(v64i1 (load (i32 IntRegs:$addr))),
+ (v64i1 (V6_vandvrt
(v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
-def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+def : Pat <(store (v128i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1),
+ (v32i32 (V6_vandqrt (v128i1 HvxQR:$src1),
(A2_tfrsi 0x01010101))))>;
-def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
- (v1024i1 (V6_vandvrt
+def : Pat <(v128i1 (load (i32 IntRegs:$addr))),
+ (v128i1 (V6_vandvrt
(v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index ea39dc44d15b..49428db223a1 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -319,7 +319,7 @@ let Namespace = "Hexagon" in {
// HVX types
def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v512i1, v1024i1, v512i1]>;
+ [v64i1, v128i1, v64i1]>;
def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i8, v128i8, v64i8]>;
def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
@@ -355,10 +355,10 @@ def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
[RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
}
-def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
+def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 128,
(add Q0, Q1, Q2, Q3)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
- [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
+ [RegInfo<64,512,512>, RegInfo<128,1024,1024>, RegInfo<64,512,512>]>;
}
def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 2c6d489f53e4..c9f04651cf70 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -286,9 +286,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
ArrayRef<MVT> ElemTypes = getHVXElementTypes();
if (IncludeBool && ElemTy == MVT::i1) {
- // Special case for the v512i1, etc.
- if (8*HwLen == NumElems)
- return true;
// Boolean HVX vector types are formed from regular HVX vector types
// by replacing the element type with i1.
for (MVT T : ElemTypes)
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
index 0fc8ba4bf1dc..08dd342d0632 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/bitwise-pred-128b.ll
@@ -10,7 +10,7 @@ define <128 x i8> @t00(<128 x i8> %a0, <128 x i8> %a1) #0 {
ret <128 x i8> %v0
}
-declare <1024 x i1> @llvm.hexagon.vandvrt.128B(<128 x i8>, i32)
+declare <128 x i1> @llvm.hexagon.vandvrt.128B(<128 x i8>, i32)
; CHECK-LABEL: t01
; CHECK: vor(v{{[0-9:]+}},v{{[0-9:]+}})
diff --git a/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll b/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
index e9fd9a0977dc..339cc3887300 100644
--- a/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
+++ b/llvm/test/CodeGen/Hexagon/bug-aa4463-ifconv-vecpred.ll
@@ -3,40 +3,34 @@
define inreg <16 x i32> @f0(i32 %a0, <16 x i32>* nocapture %a1) #0 {
b0:
- %v0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a0)
- %v1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %v0)
+ %v0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a0)
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %v0)
%v2 = icmp ult i32 %a0, 48
br i1 %v2, label %b1, label %b2
b1: ; preds = %b0
%v3 = add nuw nsw i32 %a0, 16
- %v4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v3)
- %v5 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v4, <512 x i1> %v1)
+ %v4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v3)
+ %v5 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v4, <64 x i1> %v1)
br label %b2
b2: ; preds = %b1, %b0
- %v6 = phi <512 x i1> [ %v5, %b1 ], [ %v1, %b0 ]
- %v7 = bitcast <512 x i1> %v6 to <16 x i32>
+ %v6 = phi <64 x i1> [ %v5, %b1 ], [ %v1, %b0 ]
+ %v7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v6, i32 -1)
%v8 = getelementptr inbounds <16 x i32>, <16 x i32>* %a1, i32 1
%v9 = load <16 x i32>, <16 x i32>* %v8, align 64
%v10 = getelementptr inbounds <16 x i32>, <16 x i32>* %a1, i32 2
%v11 = load <16 x i32>, <16 x i32>* %v10, align 64
- %v12 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v6, <16 x i32> %v9, <16 x i32> %v11)
+ %v12 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v6, <16 x i32> %v9, <16 x i32> %v11)
store <16 x i32> %v12, <16 x i32>* %a1, align 64
ret <16 x i32> %v7
}
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
-
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
-
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
-
-; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll b/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
index 4c266a68c245..9246b026b664 100644
--- a/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
+++ b/llvm/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll
@@ -6,12 +6,12 @@ define void @convert_const_i1_to_i8(<32 x i32>* %a0) #0 {
entry:
%v0 = load <32 x i32>, <32 x i32>* %a0, align 128
%v1 = tail call <32 x i32> @llvm.hexagon.V6.vrdelta.128B(<32 x i32> %v0, <32 x i32> undef)
- %v2 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i32> undef, <32 x i32> %v1)
+ %v2 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>, <32 x i32> undef, <32 x i32> %v1)
store <32 x i32> %v2, <32 x i32>* %a0, align 128
ret void
}
declare <32 x i32> @llvm.hexagon.V6.vrdelta.128B(<32 x i32>, <32 x i32>)
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>)
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>)
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll b/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
index 05074338cffb..372e96dbff83 100644
--- a/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
+++ b/llvm/test/CodeGen/Hexagon/early-if-vecpred.ll
@@ -14,22 +14,21 @@ target triple = "hexagon"
; CHECK: if (q{{[0-3]}}) vmem
define void @fred(i32 %a0) #0 {
b1:
- %v2 = tail call <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %a0) #2
+ %v2 = tail call <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %a0) #2
br i1 undef, label %b3, label %b5
b3: ; preds = %b1
- %v4 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> %v2) #2
+ %v4 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> %v2) #2
br label %b5
b5: ; preds = %b3, %b1
- %v6 = phi <1024 x i1> [ %v4, %b3 ], [ %v2, %b1 ]
- %v7 = bitcast <1024 x i1> %v6 to <32 x i32>
- tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<32 x i32> %v7, <32 x i32>* undef, <32 x i32> undef) #2
+ %v6 = phi <128 x i1> [ %v4, %b3 ], [ %v2, %b1 ]
+ tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<128 x i1> %v6, <32 x i32>* undef, <32 x i32> undef) #2
ret void
}
-declare <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1
-declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
index 4c93ab201e3b..7cc92736fda4 100644
--- a/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
@@ -47,61 +47,61 @@ for.body:
%18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
%arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
%19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
- %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
- %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
- %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
- %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
- %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
- %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
- %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
- %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
- %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
- %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
- %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
- %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
- %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
- %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
- %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
- %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
- %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
- %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
- %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
- %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
- %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
- %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
- %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
- %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
- %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
- %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
- %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
- %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
- %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
- %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
- %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
- %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
- %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
- %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
- %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
- %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
- %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
- %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
- %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
- %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
- %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
- %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
- %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
- %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
- %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
- %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
- %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
- %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
- %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
- %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
- %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
- %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
- %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
- %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
- %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
+ %20 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
+ %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %11, <32 x i32> %2)
+ %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %2, <32 x i32> %11)
+ %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> undef, <32 x i32> %3)
+ %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %12, <32 x i32> undef)
+ %25 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
+ %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %15, <32 x i32> %7)
+ %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %7, <32 x i32> %15)
+ %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %16, <32 x i32> %8)
+ %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %8, <32 x i32> %16)
+ %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %17, <32 x i32> %9)
+ %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %9, <32 x i32> %17)
+ %32 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
+ %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %13, <32 x i32> %4)
+ %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %4, <32 x i32> %13)
+ %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> undef, <32 x i32> %5)
+ %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %5, <32 x i32> undef)
+ %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %14, <32 x i32> %6)
+ %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %6, <32 x i32> %14)
+ %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
+ %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
+ %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
+ %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
+ %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
+ %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
+ %45 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
+ %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %26, <32 x i32> %21)
+ %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %21, <32 x i32> %26)
+ %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %28, <32 x i32> %23)
+ %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %23, <32 x i32> %28)
+ %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %30, <32 x i32> %24)
+ %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %24, <32 x i32> %30)
+ %52 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
+ %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %27, <32 x i32> %22)
+ %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %22, <32 x i32> %27)
+ %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %29, <32 x i32> undef)
+ %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> undef, <32 x i32> %31)
+ %57 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
+ %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %39, <32 x i32> %33)
+ %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %33, <32 x i32> %39)
+ %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %41, <32 x i32> %35)
+ %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %43, <32 x i32> %37)
+ %62 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
+ %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %62, <32 x i32> %42, <32 x i32> %36)
+ %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %62, <32 x i32> %38, <32 x i32> %44)
+ %65 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
+ %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %58, <32 x i32> %46)
+ %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %60, <32 x i32> %48)
+ %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %61, <32 x i32> %50)
+ %69 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
+ %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
+ %71 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
+ %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %71, <32 x i32> %63, <32 x i32> %55)
+ %73 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
+ %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %73, <32 x i32> %56, <32 x i32> %64)
%75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
%76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
%77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
@@ -129,9 +129,9 @@ for.end:
ret void
}
-declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll b/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
index c7d348ad3d38..e54ca1ea3435 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
@@ -7,51 +7,52 @@
define void @f0(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
- tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+ %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
; CHECK-LABEL: f1:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}}
define void @f1(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
- tail call void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+ %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
; CHECK-LABEL: f2:
; CHECK: if (q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f2(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
- tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+ %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
; CHECK-LABEL: f3:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f3(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <32 x i32> %a0 to <1024 x i1>
- tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
+ %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
-; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1
attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll b/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
index 27c509e49190..78c5a1161ca8 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-byte-store.ll
@@ -7,51 +7,52 @@
define void @f0(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <16 x i32> %a0 to <512 x i1>
- tail call void @llvm.hexagon.V6.vS32b.qpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+ %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.qpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.qpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.qpred.ai(<64 x i1>, i8*, <16 x i32>) #0
; CHECK-LABEL: f1:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}}
define void @f1(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <16 x i32> %a0 to <512 x i1>
- tail call void @llvm.hexagon.V6.vS32b.nqpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+ %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.nqpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nqpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nqpred.ai(<64 x i1>, i8*, <16 x i32>) #0
; CHECK-LABEL: f2:
; CHECK: if (q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f2(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <16 x i32> %a0 to <512 x i1>
- tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+ %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<64 x i1>, i8*, <16 x i32>) #0
; CHECK-LABEL: f3:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f3(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
- %v0 = bitcast <16 x i32> %a0 to <512 x i1>
- tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
+ %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
+ tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
-; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<512 x i1>, i8*, <16 x i32>) #0
+declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<64 x i1>, i8*, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll b/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
index f22ad09bae8f..b047e3801345 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-dbl-dual-output.ll
@@ -6,29 +6,34 @@
; CHECK: v{{[0-9]+}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <32 x i32> @f0(<32 x i32> %a0, <32 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
- %v0 = bitcast i8* %a2 to <1024 x i1>*
- %v1 = load <1024 x i1>, <1024 x i1>* %v0, align 128
- %v2 = tail call { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <1024 x i1> %v1)
- %v3 = extractvalue { <32 x i32>, <1024 x i1> } %v2, 0
- ret <32 x i32> %v3
+ %v0 = bitcast i8* %a2 to <32 x i32>*
+ %v1 = load <32 x i32>, <32 x i32>* %v0, align 128
+ %v2 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %v1, i32 -1)
+ %v3 = tail call { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <128 x i1> %v2)
+ %v4 = extractvalue { <32 x i32>, <128 x i1> } %v3, 0
+ ret <32 x i32> %v4
}
; CHECK-LABEL: f1:
; CHECK: v{{[0-9]+}}.w = vsub(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <32 x i32> @f1(<32 x i32> %a0, <32 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
- %v0 = bitcast i8* %a2 to <1024 x i1>*
- %v1 = load <1024 x i1>, <1024 x i1>* %v0, align 128
- %v2 = tail call { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <1024 x i1> %v1)
- %v3 = extractvalue { <32 x i32>, <1024 x i1> } %v2, 0
- ret <32 x i32> %v3
+ %v0 = bitcast i8* %a2 to <32 x i32>*
+ %v1 = load <32 x i32>, <32 x i32>* %v0, align 128
+ %v2 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %v1, i32 -1)
+ %v3 = tail call { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <128 x i1> %v2)
+ %v4 = extractvalue { <32 x i32>, <128 x i1> } %v3, 0
+ ret <32 x i32> %v4
}
; Function Attrs: nounwind readnone
-declare { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32>, <32 x i32>, <1024 x i1>) #1
+declare { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32>, <32 x i32>, <128 x i1>) #1
; Function Attrs: nounwind readnone
-declare { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32>, <32 x i32>, <1024 x i1>) #1
+declare { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32>, <32 x i32>, <128 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv65" "target-features"="+hvxv65,+hvx-length128b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll b/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
index f4d3e59fa95d..cb859aa809e7 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-dual-output.ll
@@ -6,29 +6,34 @@
; CHECK: v{{[0-9]+}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <16 x i32> @f0(<16 x i32> %a0, <16 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
- %v0 = bitcast i8* %a2 to <512 x i1>*
- %v1 = load <512 x i1>, <512 x i1>* %v0, align 64
- %v2 = tail call { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32> %a0, <16 x i32> %a1, <512 x i1> %v1)
- %v3 = extractvalue { <16 x i32>, <512 x i1> } %v2, 0
- ret <16 x i32> %v3
+ %v0 = bitcast i8* %a2 to <16 x i32>*
+ %v1 = load <16 x i32>, <16 x i32>* %v0, align 64
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v1, i32 -1)
+ %v3 = tail call { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32> %a0, <16 x i32> %a1, <64 x i1> %v2)
+ %v4 = extractvalue { <16 x i32>, <64 x i1> } %v3, 0
+ ret <16 x i32> %v4
}
; CHECK-LABEL: f1:
; CHECK: v{{[0-9]+}}.w = vsub(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <16 x i32> @f1(<16 x i32> %a0, <16 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
- %v0 = bitcast i8* %a2 to <512 x i1>*
- %v1 = load <512 x i1>, <512 x i1>* %v0, align 64
- %v2 = tail call { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32> %a0, <16 x i32> %a1, <512 x i1> %v1)
- %v3 = extractvalue { <16 x i32>, <512 x i1> } %v2, 0
- ret <16 x i32> %v3
+ %v0 = bitcast i8* %a2 to <16 x i32>*
+ %v1 = load <16 x i32>, <16 x i32>* %v0, align 64
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v1, i32 -1)
+ %v3 = tail call { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32> %a0, <16 x i32> %a1, <64 x i1> %v2)
+ %v4 = extractvalue { <16 x i32>, <64 x i1> } %v3, 0
+ ret <16 x i32> %v4
}
; Function Attrs: nounwind readnone
-declare { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32>, <16 x i32>, <512 x i1>) #1
+declare { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32>, <16 x i32>, <64 x i1>) #1
; Function Attrs: nounwind readnone
-declare { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32>, <16 x i32>, <512 x i1>) #1
+declare { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32>, <16 x i32>, <64 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv65" "target-features"="+hvxv65,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll b/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
index 26f4ac0bd038..5e9d2239edd6 100644
--- a/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
+++ b/llvm/test/CodeGen/Hexagon/inline-asm-qv.ll
@@ -10,10 +10,13 @@ target triple = "hexagon"
; Function Attrs: nounwind
define void @foo(<16 x i32> %v0, <16 x i32> %v1, <16 x i32>* nocapture %p) #0 {
entry:
- %0 = tail call <16 x i32> asm "$0 = vgtw($1.w,$2.w)", "=q,v,v"(<16 x i32> %v0, <16 x i32> %v1) #1
- store <16 x i32> %0, <16 x i32>* %p, align 64
+ %0 = tail call <64 x i1> asm "$0 = vgtw($1.w,$2.w)", "=q,v,v"(<16 x i32> %v0, <16 x i32> %v1) #1
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1) #1
+ store <16 x i32> %1, <16 x i32>* %p, align 64
ret void
}
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll b/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
index 7d2f50ed58a4..89ab13ada40b 100644
--- a/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
+++ b/llvm/test/CodeGen/Hexagon/inline-asm-vecpred128.ll
@@ -8,7 +8,7 @@ target triple = "hexagon"
; CHECK-LABEL: fred
; CHECK: if (q{{[0-3]}}) vmem
define void @fred() #0 {
- tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> undef) #0
+ tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> undef) #0
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll b/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
index cdb6b6fa80a4..ca026ded3f91 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics-v60-alu.ll
@@ -668,8 +668,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.b += v{{[0-9]+}}.b
define <16 x i32> @test84(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -677,8 +677,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.h += v{{[0-9]+}}.h
define <16 x i32> @test85(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -686,8 +686,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.w += v{{[0-9]+}}.w
define <16 x i32> @test86(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -695,8 +695,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.b += v{{[0-9]+}}.b
define <16 x i32> @test87(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -704,8 +704,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.h += v{{[0-9]+}}.h
define <16 x i32> @test88(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -713,8 +713,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.w += v{{[0-9]+}}.w
define <16 x i32> @test89(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -722,8 +722,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.b -= v{{[0-9]+}}.b
define <16 x i32> @test90(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -731,8 +731,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.h -= v{{[0-9]+}}.h
define <16 x i32> @test91(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -740,8 +740,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.w -= v{{[0-9]+}}.w
define <16 x i32> @test92(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -749,8 +749,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.b -= v{{[0-9]+}}.b
define <16 x i32> @test93(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -758,8 +758,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.h -= v{{[0-9]+}}.h
define <16 x i32> @test94(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -767,8 +767,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.w -= v{{[0-9]+}}.w
define <16 x i32> @test95(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@@ -999,18 +999,18 @@ declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #0
@@ -1029,6 +1029,7 @@ declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll b/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
index 2281f46b8518..62d2ec177303 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics-v60-misc.ll
@@ -181,8 +181,8 @@ entry:
; CHECK: v{{[0-9]+}} = vmux(q{{[0-3]+}},v{{[0-9]+}},v{{[0-9]+}})
define void @test20(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %0, <16 x i32> %b, <16 x i32> %c)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %0, <16 x i32> %b, <16 x i32> %c)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -191,10 +191,11 @@ entry:
; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},q{{[0-3]+}})
define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = bitcast <16 x i32> %b to <512 x i1>
- %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %0, <512 x i1> %1)
- store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %0, <64 x i1> %1)
+ %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+ store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@@ -202,10 +203,11 @@ entry:
; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},q{{[0-3]+}})
define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = bitcast <16 x i32> %b to <512 x i1>
- %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %0, <512 x i1> %1)
- store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %0, <64 x i1> %1)
+ %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+ store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@@ -213,9 +215,10 @@ entry:
; CHECK: q{{[0-3]+}} = not(q{{[0-3]+}})
define void @test23(<16 x i32> %a) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %0)
- store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %0)
+ %2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
+ store <16 x i32> %2, <16 x i32>* @h, align 64
ret void
}
@@ -223,10 +226,11 @@ entry:
; CHECK: q{{[0-3]+}} = xor(q{{[0-3]+}},q{{[0-3]+}})
define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = bitcast <16 x i32> %b to <512 x i1>
- %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %0, <512 x i1> %1)
- store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %0, <64 x i1> %1)
+ %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+ store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@@ -234,10 +238,11 @@ entry:
; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},!q{{[0-3]+}})
define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = bitcast <16 x i32> %b to <512 x i1>
- %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %0, <512 x i1> %1)
- store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %0, <64 x i1> %1)
+ %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+ store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@@ -245,10 +250,11 @@ entry:
; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},!q{{[0-3]+}})
define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = bitcast <16 x i32> %b to <512 x i1>
- %2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %0, <512 x i1> %1)
- store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ %2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %0, <64 x i1> %1)
+ %3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
+ store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@@ -256,8 +262,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -265,8 +272,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test28(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -274,8 +282,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test29(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -283,8 +292,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test30(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -292,8 +302,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test31(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -301,8 +312,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test32(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -310,8 +322,8 @@ entry:
; CHECK: v{{[0-9]+}} |= vand(q{{[0-3]+}},r{{[0-9]+}})
define void @test33(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
entry:
- %0 = bitcast <16 x i32> %b to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <512 x i1> %0, i32 %c)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <64 x i1> %0, i32 %c)
store <16 x i32> %1, <16 x i32>* @h, align 64
ret void
}
@@ -320,9 +332,10 @@ entry:
; CHECK: q{{[0-3]+}} |= vand(v{{[0-9]+}},r{{[0-9]+}})
define void @test34(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %0, <16 x i32> %b, i32 %c)
- store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %0, <16 x i32> %b, i32 %c)
+ %2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
+ store <16 x i32> %2, <16 x i32>* @k, align 64
ret void
}
@@ -330,8 +343,8 @@ entry:
; CHECK: v{{[0-9]+}} = vand(q{{[0-3]+}},r{{[0-9]+}})
define void @test35(<16 x i32> %a, i32 %b) #0 {
entry:
- %0 = bitcast <16 x i32> %a to <512 x i1>
- %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %0, i32 %b)
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 %b)
store <16 x i32> %1, <16 x i32>* @h, align 64
ret void
}
@@ -340,8 +353,9 @@ entry:
; CHECK: q{{[0-3]+}} = vand(v{{[0-9]+}},r{{[0-9]+}})
define void @test36(<16 x i32> %a, i32 %b) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -476,8 +490,9 @@ entry:
; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]+}})
define void @test51(i32 %a) #0 {
entry:
- %0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
- store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
+ %0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
+ %1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
+ store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@@ -546,23 +561,23 @@ declare <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #0
-declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #0
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #0
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #0
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p(i64, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p.acc(i64, i64, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p.and(i64, i64, i32) #0
@@ -577,7 +592,7 @@ declare i32 @llvm.hexagon.S6.rol.i.r.or(i32, i32, i32) #0
declare i32 @llvm.hexagon.S6.rol.i.r.xacc(i32, i32, i32) #0
declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32>, <16 x i32>, i32) #0
declare <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32>, <16 x i32>, <16 x i32>, i32) #0
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll b/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
index 588b0270902d..a3319b92164b 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics-v60-vcmp.ll
@@ -1,15 +1,16 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
- at d = external global <16 x i32>
+ at d = external global <16 x i32>, align 64
; CHECK-LABEL: test1:
; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test1(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -17,10 +18,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test2(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -28,10 +30,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test3(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -39,10 +42,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test4(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -50,10 +54,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test5(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -61,10 +66,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test6(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -72,10 +78,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test7(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -83,10 +90,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test8(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -94,10 +102,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
define void @test9(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -105,10 +114,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test10(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -116,10 +126,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test11(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -127,10 +138,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test12(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -138,10 +150,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test13(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -149,10 +162,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test14(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -160,10 +174,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test15(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -171,10 +186,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test16(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -182,10 +198,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test17(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -193,10 +210,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
define void @test18(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -204,10 +222,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test19(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -215,10 +234,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test20(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -226,10 +246,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -237,10 +258,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -248,10 +270,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test23(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -259,10 +282,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -270,10 +294,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -281,10 +306,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@@ -292,39 +318,42 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
- %0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
- %2 = bitcast <512 x i1> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* @d, align 64
+ %v0 = load <16 x i32>, <16 x i32>* @d, align 64
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
+ store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
-declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
index 3b853ebb444b..a9defbf11e26 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll
@@ -12,30 +12,32 @@
; CHECK-LABEL: V6_vmaskedstorentnq_128B
; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
-declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
+
+declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstoreq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
-declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorenq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
-declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorentq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
-declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1>, i8*, <32 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorentnq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
index 5ff672224529..2aacaeae44b3 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/byte-store.ll
@@ -12,30 +12,32 @@
; CHECK-LABEL: V6_vmaskedstorentnq
; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
-declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>)
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
+
+declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstoreq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
-declare void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1>, i8*, <16 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorenq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
-declare void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1>, i8*, <16 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorentq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
-declare void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1>, i8*, <16 x i32>)
+declare void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorentnq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1> %1, i8* %b, <16 x i32> %c)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
index 453f690f89f3..c54cd95daf78 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather-double.ll
@@ -19,6 +19,8 @@
; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
+
declare void @llvm.hexagon.V6.vgathermw.128B(i8*, i32, i32, <32 x i32>)
define void @V6_vgathermw_128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
call void @llvm.hexagon.V6.vgathermw.128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d)
@@ -37,24 +39,24 @@ define void @V6_vgathermhw_128B(i8* %a, i32 %b, i32 %c, <64 x i32> %d) {
ret void
}
-declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
- %1 = bitcast <32 x i32> %b to <1024 x i1>
- call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
+ call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}
-declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermhq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
- %1 = bitcast <32 x i32> %b to <1024 x i1>
- call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
+ call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}
-declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <1024 x i1>, i32, i32, <64 x i32>)
+declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <128 x i1>, i32, i32, <64 x i32>)
define void @V6_vgathermhwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <64 x i32> %e) {
- %1 = bitcast <32 x i32> %b to <1024 x i1>
- call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
+ call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
index bc8591527c0d..c3a3b15ea1be 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-gather.ll
@@ -19,6 +19,8 @@
; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
+
declare void @llvm.hexagon.V6.vgathermw(i8*, i32, i32, <16 x i32>)
define void @V6_vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d) {
call void @llvm.hexagon.V6.vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d)
@@ -37,23 +39,23 @@ define void @V6_vgathermhw(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
ret void
}
-declare void @llvm.hexagon.V6.vgathermwq(i8*, <512 x i1>, i32, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vgathermwq(i8*, <64 x i1>, i32, i32, <16 x i32>)
define void @V6_vgathermwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
- %1 = bitcast <16 x i32> %b to <512 x i1>
- call void @llvm.hexagon.V6.vgathermwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ call void @llvm.hexagon.V6.vgathermwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
ret void
}
-declare void @llvm.hexagon.V6.vgathermhq(i8*, <512 x i1>, i32, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vgathermhq(i8*, <64 x i1>, i32, i32, <16 x i32>)
define void @V6_vgathermhq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
- %1 = bitcast <16 x i32> %b to <512 x i1>
- call void @llvm.hexagon.V6.vgathermhq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ call void @llvm.hexagon.V6.vgathermhq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
ret void
}
-declare void @llvm.hexagon.V6.vgathermhwq(i8*, <512 x i1>, i32, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vgathermhwq(i8*, <64 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermhwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
- %1 = bitcast <16 x i32> %b to <512 x i1>
- call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
+ call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
index 40366fa3af1d..18ae2bac611f 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter-double.ll
@@ -19,6 +19,7 @@
; CHECK-LABEL: V6_vscattermhwq_128B
; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}
+declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
declare void @llvm.hexagon.V6.vscattermw.128B(i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermw_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d) {
@@ -44,17 +45,17 @@ define void @V6_vscattermh_add_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d
ret void
}
-declare void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
+declare void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermwq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
ret void
}
-declare void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
+declare void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermhq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
ret void
}
@@ -70,9 +71,9 @@ define void @V6_vscattermhw_add_128B(i32 %a, i32 %b, <64 x i32> %c, <32 x i32> %
ret void
}
-declare void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1>, i32, i32, <64 x i32>, <32 x i32>)
+declare void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1>, i32, i32, <64 x i32>, <32 x i32>)
define void @V6_vscattermhwq_128B(<32 x i32> %a, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e) {
- %1 = bitcast <32 x i32> %a to <1024 x i1>
- call void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
+ %1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
index 405211c5dfac..1a61ee8b9c62 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65-scatter.ll
@@ -19,6 +19,7 @@
; CHECK-LABEL: V6_vscattermhwq
; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
declare void @llvm.hexagon.V6.vscattermw(i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermw(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
@@ -44,17 +45,17 @@ define void @V6_vscattermh_add(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
ret void
}
-declare void @llvm.hexagon.V6.vscattermwq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
+declare void @llvm.hexagon.V6.vscattermwq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermwq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vscattermwq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vscattermwq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
ret void
}
-declare void @llvm.hexagon.V6.vscattermhq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
+declare void @llvm.hexagon.V6.vscattermhq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermhq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vscattermhq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vscattermhq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
ret void
}
@@ -70,9 +71,9 @@ define void @V6_vscattermhw_add(i32 %a, i32 %b, <32 x i32> %c, <16 x i32> %d) {
ret void
}
-declare void @llvm.hexagon.V6.vscattermhwq(<512 x i1>, i32, i32, <32 x i32>, <16 x i32>)
+declare void @llvm.hexagon.V6.vscattermhwq(<64 x i1>, i32, i32, <32 x i32>, <16 x i32>)
define void @V6_vscattermhwq(<16 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e) {
- %1 = bitcast <16 x i32> %a to <512 x i1>
- call void @llvm.hexagon.V6.vscattermhwq(<512 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
+ call void @llvm.hexagon.V6.vscattermhwq(<64 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
ret void
}
diff --git a/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll b/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
index 8d503f11800f..85f3c8e7cb16 100644
--- a/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
+++ b/llvm/test/CodeGen/Hexagon/intrinsics/v65.ll
@@ -136,21 +136,21 @@ define <16 x i32> @V6_vmpyuhe(<16 x i32> %a, i32 %b) {
}
; CHECK: = vmpye(v0.uh,r0.uh)
-;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1>)
-;define <16 x i32> @V6_vprefixqb(<512 x i1> %a) {
-; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1> %a)
+;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1>)
+;define <16 x i32> @V6_vprefixqb(<64 x i1> %a) {
+; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1> %a)
; ret <16 x i32> %b
;}
-;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1>)
-;define <16 x i32> @V6_vprefixqh(<512 x i1> %a) {
-; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1> %a)
+;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1>)
+;define <16 x i32> @V6_vprefixqh(<64 x i1> %a) {
+; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1> %a)
; ret <16 x i32> %b
;}
-;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1>)
-;define <16 x i32> @V6_vprefixqw(<512 x i1> %a) {
-; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1> %a)
+;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1>)
+;define <16 x i32> @V6_vprefixqw(<64 x i1> %a) {
+; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1> %a)
; ret <16 x i32> %b
;}
diff --git a/llvm/test/CodeGen/Hexagon/late_instr.ll b/llvm/test/CodeGen/Hexagon/late_instr.ll
index c21e0140ca06..7825ef96d2ff 100644
--- a/llvm/test/CodeGen/Hexagon/late_instr.ll
+++ b/llvm/test/CodeGen/Hexagon/late_instr.ll
@@ -28,10 +28,10 @@ b0:
%v13 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v12)
%v14 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v13)
%v15 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v14)
- %v16 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
+ %v16 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
%v17 = shl i32 1, %v8
%v18 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v17)
- %v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <512 x i1> %v16, i32 %v18)
+ %v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <64 x i1> %v16, i32 %v18)
%v20 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %a3)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v20)
%v22 = icmp sgt i32 %v5, 0
@@ -48,8 +48,8 @@ b1: ; preds = %b0
%v30 = getelementptr inbounds i8, i8* %a0, i32 %v29
%v31 = bitcast i8* %v30 to <16 x i32>*
%v32 = load <16 x i32>, <16 x i32>* %v31, align 64, !tbaa !0
- %v33 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
- %v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v33, i32 16843009)
+ %v33 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
+ %v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v33, i32 16843009)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v34)
%v36 = add i32 %v0, %a5
%v37 = getelementptr inbounds i8, i8* %a0, i32 %v36
@@ -127,11 +127,11 @@ b4: ; preds = %b4, %b3
%v100 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v94, <16 x i32> %v91)
%v101 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v97, <16 x i32> %v99)
%v102 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v98, <16 x i32> %v100)
- %v103 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
- %v104 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
- %v105 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %v103, <512 x i1> %v104)
+ %v103 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
+ %v104 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
+ %v105 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %v103, <64 x i1> %v104)
%v106 = tail call i32 @llvm.hexagon.S6.rol.i.r(i32 %v83, i32 1)
- %v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <512 x i1> %v105, i32 %v106)
+ %v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <64 x i1> %v105, i32 %v106)
%v108 = add nsw i32 %v79, -64
%v109 = icmp sgt i32 %v79, 64
br i1 %v109, label %b4, label %b5
@@ -179,16 +179,16 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S2.vsplatrb(i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
@@ -212,10 +212,10 @@ declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S6.rol.i.r(i32, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll b/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
index e161f075530e..906c5bfe4033 100644
--- a/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/peephole-move-phi.ll
@@ -15,35 +15,35 @@ b0:
br i1 %v0, label %b1, label %b2
b1: ; preds = %b0
- %v1 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> undef) #2
+ %v1 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> undef) #2
br label %b2
b2: ; preds = %b1, %b0
- %v2 = phi <1024 x i1> [ %v1, %b1 ], [ undef, %b0 ]
+ %v2 = phi <128 x i1> [ %v1, %b1 ], [ undef, %b0 ]
br label %b3
b3: ; preds = %b3, %b2
- %v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
+ %v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> undef, <32 x i32> %v3) #2
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> %v4, <32 x i32> undef) #2
- %v6 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
- %v7 = tail call <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1> %v6, <1024 x i1> undef) #2
- %v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
- tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> %v8) #2
+ %v6 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
+ %v7 = tail call <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1> %v6, <128 x i1> undef) #2
+ %v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
+ tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> %v8) #2
br label %b3
}
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1>, <1024 x i1>) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1>, <128 x i1>) #1
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
+declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32>, <32 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
index e051303e02ff..3eb0c5e74725 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-2.ll
@@ -25,36 +25,36 @@ b3: ; preds = %b3, %b2
%v7 = load <16 x i32>, <16 x i32>* %v6, align 64, !tbaa !0
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> undef, <16 x i32> %v7, i32 4)
%v9 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v8, <16 x i32> zeroinitializer)
- %v10 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
- %v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
- %v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
- %v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
+ %v10 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
+ %v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
+ %v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
+ %v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
%v14 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> undef, i32 1)
%v15 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v14, <16 x i32> zeroinitializer)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
- %v17 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
- %v18 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
- %v19 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
- %v20 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
- %v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
- %v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
+ %v17 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
+ %v18 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
+ %v19 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
+ %v20 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
+ %v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
+ %v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
%v23 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v22, <16 x i32> %v21)
%v24 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> zeroinitializer, <32 x i32> %v23, i32 16843009)
- %v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
- %v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
- %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
- %v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
+ %v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
+ %v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
+ %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
+ %v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
%v29 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> undef, <16 x i32> zeroinitializer)
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
- %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
- %v32 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
- %v33 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
+ %v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
+ %v32 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
+ %v33 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
%v34 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v24, <32 x i32> zeroinitializer, i32 16843009)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v34, <32 x i32> undef, i32 16843009)
- %v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
- %v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
- %v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
- %v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
+ %v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
+ %v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
+ %v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
+ %v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
%v40 = add nsw i32 %v3, 3
%v41 = icmp eq i32 %v40, 5
br i1 %v41, label %b4, label %b3
@@ -85,13 +85,13 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
index 49e451d54ea1..91fb350daab5 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
@@ -28,13 +28,13 @@ declare i32 @printf(i8*, ...) #0
declare void @print_vecpred(i32, i8*) #0
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind
declare void @init_vectors() #0
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
@@ -43,7 +43,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare void @init_addresses() #0
; Function Attrs: nounwind
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind
define i32 @main() #0 {
@@ -63,13 +63,13 @@ entry:
%7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
%8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
+ %9 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
%10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
- %12 = bitcast <512 x i1> %11 to <16 x i32>
- %13 = bitcast <16 x i32> %12 to <512 x i1>
- %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
+ %11 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
+ %12 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %11, i32 -1)
+ %13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
+ %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %13, <16 x i32> undef, <16 x i32> undef)
store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
ret i32 0
}
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
index 641af59a7c53..138b7321086b 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-4.ll
@@ -114,12 +114,12 @@ b4: ; preds = %b3
%v91 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v90, i32 1)
%v92 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v91, i32 1)
%v93 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v92, i32 1)
- %v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
- %v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
+ %v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
+ %v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> undef, i32 1)
- %v97 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
- %v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
- %v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> undef)
+ %v97 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
+ %v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
+ %v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> undef)
%v100 = tail call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %v99, <16 x i32> %v98)
%v101 = tail call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %v100, <16 x i32> undef)
%v102 = getelementptr inbounds <16 x i32>, <16 x i32>* %v2, i32 1
@@ -183,13 +183,13 @@ declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll b/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
index bc878e09ef94..8ffa4659a9dd 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll
@@ -82,14 +82,14 @@ entry:
%asmresult58 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 29
%asmresult59 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 30
%asmresult60 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 31
- %2 = tail call { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } asm "nop", "=q,=q,=q,=q"() #1
- %asmresult61 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 0
- %asmresult62 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 1
- %asmresult63 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 2
- %asmresult64 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 3
- %3 = tail call <16 x i32> asm "nop", "=q,q,q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63, <16 x i32> %asmresult64) #1
- tail call void asm sideeffect "nop", "q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63) #2
- tail call void asm sideeffect "nop", "q,q"(<16 x i32> %asmresult64, <16 x i32> %3) #2
+ %2 = tail call { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } asm "nop", "=q,=q,=q,=q"() #1
+ %asmresult61 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 0
+ %asmresult62 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 1
+ %asmresult63 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 2
+ %asmresult64 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 3
+ %3 = tail call <64 x i1> asm "nop", "=q,q,q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63, <64 x i1> %asmresult64) #1
+ tail call void asm sideeffect "nop", "q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63) #2
+ tail call void asm sideeffect "nop", "q,q"(<64 x i1> %asmresult64, <64 x i1> %3) #2
tail call void asm sideeffect "nop", "v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v"(<16 x i32> %asmresult29, <16 x i32> %asmresult30, <16 x i32> %asmresult31, <16 x i32> %asmresult32, <16 x i32> %asmresult33, <16 x i32> %asmresult34, <16 x i32> %asmresult35, <16 x i32> %asmresult36, <16 x i32> %asmresult37, <16 x i32> %asmresult38, <16 x i32> %asmresult39, <16 x i32> %asmresult40, <16 x i32> %asmresult41, <16 x i32> %asmresult42, <16 x i32> %asmresult43, <16 x i32> %asmresult44, <16 x i32> %asmresult45, <16 x i32> %asmresult46, <16 x i32> %asmresult47, <16 x i32> %asmresult48, <16 x i32> %asmresult49, <16 x i32> %asmresult50, <16 x i32> %asmresult51, <16 x i32> %asmresult52, <16 x i32> %asmresult53, <16 x i32> %asmresult54, <16 x i32> %asmresult55, <16 x i32> %asmresult56, <16 x i32> %asmresult57, <16 x i32> %asmresult58, <16 x i32> %asmresult59, <16 x i32> %asmresult60) #2
tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14, i32 %asmresult15, i32 %asmresult16, i32 %asmresult17, i32 %asmresult18, i32 %asmresult19, i32 %asmresult20, i32 %asmresult21, i32 %asmresult22, i32 %asmresult23, i32 %asmresult24, i32 %asmresult25, i32 %asmresult26, i32 %asmresult27, i32 %asmresult28) #2
ret void
diff --git a/llvm/test/CodeGen/Hexagon/split-vecpred.ll b/llvm/test/CodeGen/Hexagon/split-vecpred.ll
index 615eb88926c9..c3c0e18b2b26 100644
--- a/llvm/test/CodeGen/Hexagon/split-vecpred.ll
+++ b/llvm/test/CodeGen/Hexagon/split-vecpred.ll
@@ -32,7 +32,7 @@ b7: ; preds = %b6
br label %b8
b8: ; preds = %b7
- %v0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
+ %v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
br i1 undef, label %b9, label %b11
b9: ; preds = %b8
@@ -42,9 +42,9 @@ b10: ; preds = %b12
br label %b11
b11: ; preds = %b10, %b8
- %v1 = phi <512 x i1> [ %v0, %b8 ], [ undef, %b10 ]
- %v2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> undef)
- %v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
+ %v1 = phi <64 x i1> [ %v0, %b8 ], [ undef, %b10 ]
+ %v2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> undef)
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
%v4 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %v3, i32 undef)
%v5 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v4, <16 x i32> undef, i32 undef)
%v6 = tail call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %v5, <16 x i32> undef)
@@ -53,9 +53,9 @@ b11: ; preds = %b10, %b8
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32> undef, <16 x i32> %v8)
%v10 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v9)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vor(<16 x i32> %v10, <16 x i32> undef)
- %v12 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
- %v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v12, i32 undef)
- tail call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> undef, i8* undef, <16 x i32> %v13)
+ %v12 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
+ %v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v12, i32 undef)
+ tail call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> undef, i8* undef, <16 x i32> %v13)
unreachable
b12: ; preds = %b12, %b9
@@ -69,22 +69,22 @@ b13: ; preds = %b5
}
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: argmemonly nounwind
-declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>) #2
+declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>) #2
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vor(<16 x i32>, <16 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll b/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
index 14b04a1cfe64..b6af6d5a50d7 100644
--- a/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-prolog-phi.ll
@@ -25,9 +25,9 @@ b3: ; preds = %b3, %b2
%v2 = phi i32 [ 0, %b2 ], [ %v8, %b3 ]
%v3 = phi <32 x i32> [ zeroinitializer, %b2 ], [ %v0, %b3 ]
%v4 = phi <32 x i32> [ %v1, %b2 ], [ %v7, %b3 ]
- %v5 = tail call <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
- %v6 = tail call <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
- %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
+ %v5 = tail call <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
+ %v6 = tail call <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
+ %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
%v8 = add nsw i32 %v2, 1
%v9 = icmp slt i32 %v8, %a2
br i1 %v9, label %b3, label %b4
@@ -40,13 +40,13 @@ b5: ; preds = %b4, %b0
}
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/swp-sigma.ll b/llvm/test/CodeGen/Hexagon/swp-sigma.ll
index 165174282099..1e376323a32f 100644
--- a/llvm/test/CodeGen/Hexagon/swp-sigma.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-sigma.ll
@@ -17,9 +17,9 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
declare <16 x i32> @llvm.hexagon.V6.vd0() #0
declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #0
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
@@ -106,48 +106,48 @@ b6: ; preds = %b6, %b5
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %v8, <16 x i32> %v47) #2
%v54 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v45, <16 x i32> %v47) #2
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v49, <16 x i32> %v47) #2
- %v56 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
- %v57 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
- %v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
- %v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
- %v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
- %v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
+ %v56 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
+ %v57 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
+ %v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
+ %v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
+ %v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
+ %v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
%v62 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v61, <16 x i32> %v60) #2
%v63 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v53, <32 x i32> %v62, i32 -1) #2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v45, <16 x i32> %v44, i32 1) #2
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v49, <16 x i32> %v48, i32 1) #2
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v64, <16 x i32> %v47) #2
%v67 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v65, <16 x i32> %v47) #2
- %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
- %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
- %v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
- %v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
- %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
- %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
+ %v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
+ %v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
+ %v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
+ %v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
+ %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
+ %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
%v74 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v73, <16 x i32> %v72) #2
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v63, <32 x i32> %v74, i32 -1) #2
%v76 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v50, <16 x i32> %v45, i32 1) #2
%v77 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v52, <16 x i32> %v49, i32 1) #2
%v78 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v76, <16 x i32> %v47) #2
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v77, <16 x i32> %v47) #2
- %v80 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
- %v81 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
- %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
- %v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
- %v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
- %v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
+ %v80 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
+ %v81 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
+ %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
+ %v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
+ %v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
+ %v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
%v86 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v85, <16 x i32> %v84) #2
%v87 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v75, <32 x i32> %v86, i32 -1) #2
%v88 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v47, <16 x i32> %v46, i32 1) #2
%v89 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v51, <16 x i32> %v47, i32 1) #2
%v90 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v88, <16 x i32> %v47) #2
%v91 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v89, <16 x i32> %v47) #2
- %v92 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
- %v93 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
- %v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
- %v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
- %v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
- %v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
+ %v92 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
+ %v93 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
+ %v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
+ %v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
+ %v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
+ %v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
%v98 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v97, <16 x i32> %v96) #2
%v99 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v87, <32 x i32> %v98, i32 -1) #2
%v100 = tail call <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32> %v95, <16 x i32> %v4, i32 0) #2
diff --git a/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll b/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
index 4605b1c9b0a0..fada5c11732d 100644
--- a/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-inlasm4.ll
@@ -12,8 +12,9 @@ b0:
store i32 %a0, i32* %v0, align 4
store <16 x i32> %a1, <16 x i32>* %v1, align 64
%v3 = load i32, i32* %v0, align 4
- %v4 = load <16 x i32>, <16 x i32>* %v2, align 64
- call void asm sideeffect " $1 = vsetq($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1, !srcloc !0
+ %v4 = tail call <64 x i1> asm sideeffect " $0 = vsetq($1);\0A", "=q,r"(i32 %v3) #1, !srcloc !0
+ %v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
+ store <16 x i32> %v5, <16 x i32>* %v2, align 64
ret void
}
@@ -23,7 +24,9 @@ b0:
ret i32 0
}
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind readnone }
!0 = !{i32 222}
diff --git a/llvm/test/CodeGen/Hexagon/v6-spill1.ll b/llvm/test/CodeGen/Hexagon/v6-spill1.ll
index 788c27d42382..c2f37d44b288 100644
--- a/llvm/test/CodeGen/Hexagon/v6-spill1.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-spill1.ll
@@ -44,10 +44,10 @@ b3: ; preds = %b3, %b2
%v28 = bitcast i8* %v27 to <16 x i32>*
%v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v25, <16 x i32> %v14)
- %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
- %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
+ %v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
+ %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
- %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
+ %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
@@ -56,22 +56,22 @@ b3: ; preds = %b3, %b2
%v40 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v36, <16 x i32> %v14)
%v41 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v37, <16 x i32> %v14)
%v42 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v38, <16 x i32> %v14)
- %v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
- %v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
- %v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
- %v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
- %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
- %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
- %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
- %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
+ %v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
+ %v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
+ %v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
+ %v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
+ %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
+ %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
+ %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
+ %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
- %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
- %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
- %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
- %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
+ %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
+ %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
+ %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
+ %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
%v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
@@ -80,22 +80,22 @@ b3: ; preds = %b3, %b2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v60, <16 x i32> %v14)
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v61, <16 x i32> %v14)
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v62, <16 x i32> %v14)
- %v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
- %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
- %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
- %v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
- %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
- %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
- %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
- %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
+ %v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
+ %v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
+ %v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
+ %v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
+ %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
+ %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
+ %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
+ %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
%v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
%v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
%v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
- %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
- %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
- %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
- %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
+ %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
+ %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
+ %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
+ %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v83 = add nsw i32 %v15, 1
%v84 = icmp eq i32 %v83, 5
br i1 %v84, label %b4, label %b3
@@ -147,16 +147,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll b/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
index be53694c40fa..a6eb739f8471 100644
--- a/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll
@@ -28,7 +28,7 @@ b3: ; preds = %b2
b4: ; preds = %b4, %b3
%v3 = phi <32 x i32> [ %v5, %b4 ], [ undef, %b3 ]
- %v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
+ %v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32> %v3, <32 x i32> %v2) #2
br label %b4
@@ -43,7 +43,7 @@ declare void @f1(i8* nocapture readonly, i8* nocapture readonly, i8* nocapture,
declare <32 x i32> @llvm.hexagon.V6.vd0.128B() #1
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32>, <32 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll b/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
index ed0fb0592541..c5cba8cf6155 100644
--- a/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
+++ b/llvm/test/CodeGen/Hexagon/v6-vecpred-copy.ll
@@ -32,76 +32,76 @@ b0:
%v2 = call <16 x i32> @llvm.hexagon.V6.vd0()
store <16 x i32> %v2, <16 x i32>* @g2, align 64
%v3 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v4 = bitcast <16 x i32> %v3 to <512 x i1>
+ %v4 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v3, i32 -1)
%v5 = load <16 x i32>, <16 x i32>* @g2, align 64
%v6 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
+ %v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
store <16 x i32> %v7, <16 x i32>* @g2, align 64
%v8 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v9 = bitcast <16 x i32> %v8 to <512 x i1>
+ %v9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v8, i32 -1)
%v10 = load <16 x i32>, <16 x i32>* @g2, align 64
%v11 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
+ %v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
store <16 x i32> %v12, <16 x i32>* @g2, align 64
%v13 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v14 = bitcast <16 x i32> %v13 to <512 x i1>
+ %v14 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v13, i32 -1)
%v15 = load <16 x i32>, <16 x i32>* @g2, align 64
%v16 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
+ %v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
store <16 x i32> %v17, <16 x i32>* @g2, align 64
%v18 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v19 = bitcast <16 x i32> %v18 to <512 x i1>
+ %v19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v18, i32 -1)
%v20 = load <16 x i32>, <16 x i32>* @g2, align 64
%v21 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
+ %v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
store <16 x i32> %v22, <16 x i32>* @g2, align 64
%v23 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v24 = bitcast <16 x i32> %v23 to <512 x i1>
+ %v24 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v23, i32 -1)
%v25 = load <16 x i32>, <16 x i32>* @g2, align 64
%v26 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
+ %v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
store <16 x i32> %v27, <16 x i32>* @g2, align 64
%v28 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v29 = bitcast <16 x i32> %v28 to <512 x i1>
+ %v29 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v28, i32 -1)
%v30 = load <16 x i32>, <16 x i32>* @g2, align 64
%v31 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
+ %v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
store <16 x i32> %v32, <16 x i32>* @g2, align 64
%v33 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v34 = bitcast <16 x i32> %v33 to <512 x i1>
+ %v34 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v33, i32 -1)
%v35 = load <16 x i32>, <16 x i32>* @g2, align 64
%v36 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
+ %v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
store <16 x i32> %v37, <16 x i32>* @g2, align 64
%v38 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v39 = bitcast <16 x i32> %v38 to <512 x i1>
+ %v39 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v38, i32 -1)
%v40 = load <16 x i32>, <16 x i32>* @g2, align 64
%v41 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
+ %v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
store <16 x i32> %v42, <16 x i32>* @g2, align 64
%v43 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v44 = bitcast <16 x i32> %v43 to <512 x i1>
+ %v44 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v43, i32 -1)
%v45 = load <16 x i32>, <16 x i32>* @g2, align 64
%v46 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
+ %v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
store <16 x i32> %v47, <16 x i32>* @g2, align 64
%v48 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v49 = bitcast <16 x i32> %v48 to <512 x i1>
+ %v49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v48, i32 -1)
%v50 = load <16 x i32>, <16 x i32>* @g2, align 64
%v51 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
+ %v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
store <16 x i32> %v52, <16 x i32>* @g2, align 64
%v53 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v54 = bitcast <16 x i32> %v53 to <512 x i1>
+ %v54 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v53, i32 -1)
%v55 = load <16 x i32>, <16 x i32>* @g2, align 64
%v56 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
+ %v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
store <16 x i32> %v57, <16 x i32>* @g2, align 64
%v58 = load <16 x i32>, <16 x i32>* @g3, align 64
- %v59 = bitcast <16 x i32> %v58 to <512 x i1>
+ %v59 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v58, i32 -1)
%v60 = load <16 x i32>, <16 x i32>* @g2, align 64
%v61 = load <16 x i32>, <16 x i32>* @g1, align 64
- %v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
+ %v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
store <16 x i32> %v62, <16 x i32>* @g2, align 64
ret i32 0
}
@@ -110,40 +110,43 @@ b0:
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll b/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
index e80a9212e06a..f03a0cc438f0 100644
--- a/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/v60-vecpred-spill.ll
@@ -48,10 +48,10 @@ b3: ; preds = %b3, %b2
%v28 = bitcast i8* %v27 to <16 x i32>*
%v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v25, <16 x i32> %v14)
- %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
- %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
+ %v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
+ %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
- %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
+ %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
@@ -60,22 +60,22 @@ b3: ; preds = %b3, %b2
%v40 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v36, <16 x i32> %v14)
%v41 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v37, <16 x i32> %v14)
%v42 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v38, <16 x i32> %v14)
- %v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
- %v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
- %v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
- %v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
- %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
- %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
- %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
- %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
+ %v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
+ %v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
+ %v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
+ %v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
+ %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
+ %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
+ %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
+ %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
- %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
- %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
- %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
- %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
+ %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
+ %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
+ %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
+ %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
%v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
@@ -84,22 +84,22 @@ b3: ; preds = %b3, %b2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v60, <16 x i32> %v14)
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v61, <16 x i32> %v14)
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32> %v62, <16 x i32> %v14)
- %v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
- %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
- %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
- %v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
- %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
- %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
- %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
- %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
+ %v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
+ %v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
+ %v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
+ %v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
+ %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
+ %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
+ %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
+ %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
%v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
%v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
%v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
- %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
- %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
- %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
- %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
+ %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
+ %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
+ %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
+ %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v83 = add nsw i32 %v15, 1
%v84 = icmp eq i32 %v83, 5
br i1 %v84, label %b4, label %b3
@@ -151,16 +151,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabs
diff ub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/v60-vsel1.ll b/llvm/test/CodeGen/Hexagon/v60-vsel1.ll
index 71d112cc7357..5da450b80459 100644
--- a/llvm/test/CodeGen/Hexagon/v60-vsel1.ll
+++ b/llvm/test/CodeGen/Hexagon/v60-vsel1.ll
@@ -14,8 +14,8 @@ entry:
%add = add i32 %sub, %rem
%2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
- %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <512 x i1> %4, i32 12)
+ %4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
+ %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <64 x i1> %4, i32 12)
%and4 = and i32 %add, 511
%cmp = icmp eq i32 %and4, 0
%sMaskR.0 = select i1 %cmp, <16 x i32> %2, <16 x i32> %5
@@ -23,8 +23,8 @@ entry:
br i1 %cmp547, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
- %6 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
- %7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %6, i32 16843009)
+ %6 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
+ %7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %6, i32 16843009)
%8 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %7)
%9 = add i32 %rem, %xsize
%10 = add i32 %9, -1
@@ -59,9 +59,9 @@ for.end: ; preds = %for.cond.for.end_cr
}
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/v60-vsel2.ll b/llvm/test/CodeGen/Hexagon/v60-vsel2.ll
index 7dc06bb88e9c..8db3dd4ded0f 100644
--- a/llvm/test/CodeGen/Hexagon/v60-vsel2.ll
+++ b/llvm/test/CodeGen/Hexagon/v60-vsel2.ll
@@ -14,8 +14,8 @@ b0:
%v4 = add i32 %v2, %v3
%v5 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
%v6 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %v7 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
- %v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <512 x i1> %v7, i32 12)
+ %v7 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
+ %v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <64 x i1> %v7, i32 12)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v8, <16 x i32> %v8)
%v10 = and i32 %v4, 511
%v11 = icmp eq i32 %v10, 0
@@ -31,8 +31,8 @@ b2: ; preds = %b1, %b0
br i1 %v14, label %b3, label %b6
b3: ; preds = %b2
- %v15 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
- %v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v15, i32 16843009)
+ %v15 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
+ %v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v15, i32 16843009)
%v17 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v16)
%v18 = add i32 %v3, %a1
%v19 = add i32 %v18, -1
@@ -71,16 +71,16 @@ b6: ; preds = %b5, %b2
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/v60Intrins.ll b/llvm/test/CodeGen/Hexagon/v60Intrins.ll
index 45c122c1cb83..61087f573e6a 100644
--- a/llvm/test/CodeGen/Hexagon/v60Intrins.ll
+++ b/llvm/test/CodeGen/Hexagon/v60Intrins.ll
@@ -372,291 +372,291 @@ entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %1 = bitcast <16 x i32> %0 to <512 x i1>
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -1)
%2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %3 = bitcast <16 x i32> %2 to <512 x i1>
- %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
- %5 = bitcast <512 x i1> %4 to <16 x i32>
+ %3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %2, i32 -1)
+ %4 = call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %1, <64 x i1> %3)
+ %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %4, i32 -1)
store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
%6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %7 = bitcast <16 x i32> %6 to <512 x i1>
+ %7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %6, i32 -1)
%8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %9 = bitcast <16 x i32> %8 to <512 x i1>
- %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
- %11 = bitcast <512 x i1> %10 to <16 x i32>
+ %9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 -1)
+ %10 = call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %7, <64 x i1> %9)
+ %11 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %10, i32 -1)
store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
%12 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %13 = bitcast <16 x i32> %12 to <512 x i1>
- %14 = call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %13)
- %15 = bitcast <512 x i1> %14 to <16 x i32>
+ %13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
+ %14 = call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %13)
+ %15 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %14, i32 -1)
store volatile <16 x i32> %15, <16 x i32>* @Q6VecPredResult, align 64
%16 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %17 = bitcast <16 x i32> %16 to <512 x i1>
+ %17 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %16, i32 -1)
%18 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %19 = bitcast <16 x i32> %18 to <512 x i1>
- %20 = call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %17, <512 x i1> %19)
- %21 = bitcast <512 x i1> %20 to <16 x i32>
+ %19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %18, i32 -1)
+ %20 = call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %17, <64 x i1> %19)
+ %21 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %20, i32 -1)
store volatile <16 x i32> %21, <16 x i32>* @Q6VecPredResult, align 64
%22 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %23 = bitcast <16 x i32> %22 to <512 x i1>
+ %23 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %22, i32 -1)
%24 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %25 = bitcast <16 x i32> %24 to <512 x i1>
- %26 = call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %23, <512 x i1> %25)
- %27 = bitcast <512 x i1> %26 to <16 x i32>
+ %25 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %24, i32 -1)
+ %26 = call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %23, <64 x i1> %25)
+ %27 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %26, i32 -1)
store volatile <16 x i32> %27, <16 x i32>* @Q6VecPredResult, align 64
%28 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
- %29 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
- %30 = bitcast <512 x i1> %29 to <16 x i32>
+ %29 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
+ %30 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %29, i32 -1)
store volatile <16 x i32> %30, <16 x i32>* @Q6VecPredResult, align 64
%31 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %32 = bitcast <16 x i32> %31 to <512 x i1>
+ %32 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %31, i32 -1)
%33 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
- %34 = call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %32, <16 x i32> %33, i32 -1)
- %35 = bitcast <512 x i1> %34 to <16 x i32>
+ %34 = call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %32, <16 x i32> %33, i32 -1)
+ %35 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %34, i32 -1)
store volatile <16 x i32> %35, <16 x i32>* @Q6VecPredResult, align 64
%36 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%37 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %38 = call <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
- %39 = bitcast <512 x i1> %38 to <16 x i32>
+ %38 = call <64 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
+ %39 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %38, i32 -1)
store volatile <16 x i32> %39, <16 x i32>* @Q6VecPredResult, align 64
%40 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%41 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %42 = call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
- %43 = bitcast <512 x i1> %42 to <16 x i32>
+ %42 = call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
+ %43 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %42, i32 -1)
store volatile <16 x i32> %43, <16 x i32>* @Q6VecPredResult, align 64
%44 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%45 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %46 = call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
- %47 = bitcast <512 x i1> %46 to <16 x i32>
+ %46 = call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
+ %47 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %46, i32 -1)
store volatile <16 x i32> %47, <16 x i32>* @Q6VecPredResult, align 64
%48 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %49 = bitcast <16 x i32> %48 to <512 x i1>
+ %49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %48, i32 -1)
%50 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%51 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %52 = call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %49, <16 x i32> %50, <16 x i32> %51)
- %53 = bitcast <512 x i1> %52 to <16 x i32>
+ %52 = call <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1> %49, <16 x i32> %50, <16 x i32> %51)
+ %53 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %52, i32 -1)
store volatile <16 x i32> %53, <16 x i32>* @Q6VecPredResult, align 64
%54 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %55 = bitcast <16 x i32> %54 to <512 x i1>
+ %55 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %54, i32 -1)
%56 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%57 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %58 = call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %55, <16 x i32> %56, <16 x i32> %57)
- %59 = bitcast <512 x i1> %58 to <16 x i32>
+ %58 = call <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1> %55, <16 x i32> %56, <16 x i32> %57)
+ %59 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %58, i32 -1)
store volatile <16 x i32> %59, <16 x i32>* @Q6VecPredResult, align 64
%60 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %61 = bitcast <16 x i32> %60 to <512 x i1>
+ %61 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %60, i32 -1)
%62 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%63 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %64 = call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %61, <16 x i32> %62, <16 x i32> %63)
- %65 = bitcast <512 x i1> %64 to <16 x i32>
+ %64 = call <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1> %61, <16 x i32> %62, <16 x i32> %63)
+ %65 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %64, i32 -1)
store volatile <16 x i32> %65, <16 x i32>* @Q6VecPredResult, align 64
%66 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %67 = bitcast <16 x i32> %66 to <512 x i1>
+ %67 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %66, i32 -1)
%68 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%69 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %70 = call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %67, <16 x i32> %68, <16 x i32> %69)
- %71 = bitcast <512 x i1> %70 to <16 x i32>
+ %70 = call <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1> %67, <16 x i32> %68, <16 x i32> %69)
+ %71 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %70, i32 -1)
store volatile <16 x i32> %71, <16 x i32>* @Q6VecPredResult, align 64
%72 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %73 = bitcast <16 x i32> %72 to <512 x i1>
+ %73 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %72, i32 -1)
%74 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%75 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %76 = call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %73, <16 x i32> %74, <16 x i32> %75)
- %77 = bitcast <512 x i1> %76 to <16 x i32>
+ %76 = call <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1> %73, <16 x i32> %74, <16 x i32> %75)
+ %77 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %76, i32 -1)
store volatile <16 x i32> %77, <16 x i32>* @Q6VecPredResult, align 64
%78 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %79 = bitcast <16 x i32> %78 to <512 x i1>
+ %79 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %78, i32 -1)
%80 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%81 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %82 = call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %79, <16 x i32> %80, <16 x i32> %81)
- %83 = bitcast <512 x i1> %82 to <16 x i32>
+ %82 = call <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1> %79, <16 x i32> %80, <16 x i32> %81)
+ %83 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %82, i32 -1)
store volatile <16 x i32> %83, <16 x i32>* @Q6VecPredResult, align 64
%84 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %85 = bitcast <16 x i32> %84 to <512 x i1>
+ %85 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %84, i32 -1)
%86 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%87 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %88 = call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %85, <16 x i32> %86, <16 x i32> %87)
- %89 = bitcast <512 x i1> %88 to <16 x i32>
+ %88 = call <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1> %85, <16 x i32> %86, <16 x i32> %87)
+ %89 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %88, i32 -1)
store volatile <16 x i32> %89, <16 x i32>* @Q6VecPredResult, align 64
%90 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %91 = bitcast <16 x i32> %90 to <512 x i1>
+ %91 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %90, i32 -1)
%92 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%93 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %94 = call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %91, <16 x i32> %92, <16 x i32> %93)
- %95 = bitcast <512 x i1> %94 to <16 x i32>
+ %94 = call <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1> %91, <16 x i32> %92, <16 x i32> %93)
+ %95 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %94, i32 -1)
store volatile <16 x i32> %95, <16 x i32>* @Q6VecPredResult, align 64
%96 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %97 = bitcast <16 x i32> %96 to <512 x i1>
+ %97 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %96, i32 -1)
%98 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%99 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %100 = call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %97, <16 x i32> %98, <16 x i32> %99)
- %101 = bitcast <512 x i1> %100 to <16 x i32>
+ %100 = call <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1> %97, <16 x i32> %98, <16 x i32> %99)
+ %101 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %100, i32 -1)
store volatile <16 x i32> %101, <16 x i32>* @Q6VecPredResult, align 64
%102 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%103 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %104 = call <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
- %105 = bitcast <512 x i1> %104 to <16 x i32>
+ %104 = call <64 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
+ %105 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %104, i32 -1)
store volatile <16 x i32> %105, <16 x i32>* @Q6VecPredResult, align 64
%106 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%107 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %108 = call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
- %109 = bitcast <512 x i1> %108 to <16 x i32>
+ %108 = call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
+ %109 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %108, i32 -1)
store volatile <16 x i32> %109, <16 x i32>* @Q6VecPredResult, align 64
%110 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%111 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %112 = call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
- %113 = bitcast <512 x i1> %112 to <16 x i32>
+ %112 = call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
+ %113 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %112, i32 -1)
store volatile <16 x i32> %113, <16 x i32>* @Q6VecPredResult, align 64
%114 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%115 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %116 = call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
- %117 = bitcast <512 x i1> %116 to <16 x i32>
+ %116 = call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
+ %117 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %116, i32 -1)
store volatile <16 x i32> %117, <16 x i32>* @Q6VecPredResult, align 64
%118 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%119 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %120 = call <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
- %121 = bitcast <512 x i1> %120 to <16 x i32>
+ %120 = call <64 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
+ %121 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %120, i32 -1)
store volatile <16 x i32> %121, <16 x i32>* @Q6VecPredResult, align 64
%122 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%123 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %124 = call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
- %125 = bitcast <512 x i1> %124 to <16 x i32>
+ %124 = call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
+ %125 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %124, i32 -1)
store volatile <16 x i32> %125, <16 x i32>* @Q6VecPredResult, align 64
%126 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %127 = bitcast <16 x i32> %126 to <512 x i1>
+ %127 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %126, i32 -1)
%128 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%129 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %130 = call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %127, <16 x i32> %128, <16 x i32> %129)
- %131 = bitcast <512 x i1> %130 to <16 x i32>
+ %130 = call <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1> %127, <16 x i32> %128, <16 x i32> %129)
+ %131 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %130, i32 -1)
store volatile <16 x i32> %131, <16 x i32>* @Q6VecPredResult, align 64
%132 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %133 = bitcast <16 x i32> %132 to <512 x i1>
+ %133 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %132, i32 -1)
%134 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%135 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %136 = call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %133, <16 x i32> %134, <16 x i32> %135)
- %137 = bitcast <512 x i1> %136 to <16 x i32>
+ %136 = call <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1> %133, <16 x i32> %134, <16 x i32> %135)
+ %137 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %136, i32 -1)
store volatile <16 x i32> %137, <16 x i32>* @Q6VecPredResult, align 64
%138 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %139 = bitcast <16 x i32> %138 to <512 x i1>
+ %139 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %138, i32 -1)
%140 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%141 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %142 = call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %139, <16 x i32> %140, <16 x i32> %141)
- %143 = bitcast <512 x i1> %142 to <16 x i32>
+ %142 = call <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1> %139, <16 x i32> %140, <16 x i32> %141)
+ %143 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %142, i32 -1)
store volatile <16 x i32> %143, <16 x i32>* @Q6VecPredResult, align 64
%144 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %145 = bitcast <16 x i32> %144 to <512 x i1>
+ %145 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %144, i32 -1)
%146 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%147 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %148 = call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %145, <16 x i32> %146, <16 x i32> %147)
- %149 = bitcast <512 x i1> %148 to <16 x i32>
+ %148 = call <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1> %145, <16 x i32> %146, <16 x i32> %147)
+ %149 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %148, i32 -1)
store volatile <16 x i32> %149, <16 x i32>* @Q6VecPredResult, align 64
%150 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %151 = bitcast <16 x i32> %150 to <512 x i1>
+ %151 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %150, i32 -1)
%152 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%153 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %154 = call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %151, <16 x i32> %152, <16 x i32> %153)
- %155 = bitcast <512 x i1> %154 to <16 x i32>
+ %154 = call <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1> %151, <16 x i32> %152, <16 x i32> %153)
+ %155 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %154, i32 -1)
store volatile <16 x i32> %155, <16 x i32>* @Q6VecPredResult, align 64
%156 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %157 = bitcast <16 x i32> %156 to <512 x i1>
+ %157 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %156, i32 -1)
%158 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%159 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %160 = call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %157, <16 x i32> %158, <16 x i32> %159)
- %161 = bitcast <512 x i1> %160 to <16 x i32>
+ %160 = call <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1> %157, <16 x i32> %158, <16 x i32> %159)
+ %161 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %160, i32 -1)
store volatile <16 x i32> %161, <16 x i32>* @Q6VecPredResult, align 64
%162 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %163 = bitcast <16 x i32> %162 to <512 x i1>
+ %163 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %162, i32 -1)
%164 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%165 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %166 = call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %163, <16 x i32> %164, <16 x i32> %165)
- %167 = bitcast <512 x i1> %166 to <16 x i32>
+ %166 = call <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1> %163, <16 x i32> %164, <16 x i32> %165)
+ %167 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %166, i32 -1)
store volatile <16 x i32> %167, <16 x i32>* @Q6VecPredResult, align 64
%168 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %169 = bitcast <16 x i32> %168 to <512 x i1>
+ %169 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %168, i32 -1)
%170 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%171 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %172 = call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %169, <16 x i32> %170, <16 x i32> %171)
- %173 = bitcast <512 x i1> %172 to <16 x i32>
+ %172 = call <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1> %169, <16 x i32> %170, <16 x i32> %171)
+ %173 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %172, i32 -1)
store volatile <16 x i32> %173, <16 x i32>* @Q6VecPredResult, align 64
%174 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %175 = bitcast <16 x i32> %174 to <512 x i1>
+ %175 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %174, i32 -1)
%176 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%177 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %178 = call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %175, <16 x i32> %176, <16 x i32> %177)
- %179 = bitcast <512 x i1> %178 to <16 x i32>
+ %178 = call <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1> %175, <16 x i32> %176, <16 x i32> %177)
+ %179 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %178, i32 -1)
store volatile <16 x i32> %179, <16 x i32>* @Q6VecPredResult, align 64
%180 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %181 = bitcast <16 x i32> %180 to <512 x i1>
+ %181 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %180, i32 -1)
%182 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%183 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %184 = call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %181, <16 x i32> %182, <16 x i32> %183)
- %185 = bitcast <512 x i1> %184 to <16 x i32>
+ %184 = call <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1> %181, <16 x i32> %182, <16 x i32> %183)
+ %185 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %184, i32 -1)
store volatile <16 x i32> %185, <16 x i32>* @Q6VecPredResult, align 64
%186 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %187 = bitcast <16 x i32> %186 to <512 x i1>
+ %187 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %186, i32 -1)
%188 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%189 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %190 = call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %187, <16 x i32> %188, <16 x i32> %189)
- %191 = bitcast <512 x i1> %190 to <16 x i32>
+ %190 = call <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1> %187, <16 x i32> %188, <16 x i32> %189)
+ %191 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %190, i32 -1)
store volatile <16 x i32> %191, <16 x i32>* @Q6VecPredResult, align 64
%192 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %193 = bitcast <16 x i32> %192 to <512 x i1>
+ %193 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %192, i32 -1)
%194 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%195 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %196 = call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %193, <16 x i32> %194, <16 x i32> %195)
- %197 = bitcast <512 x i1> %196 to <16 x i32>
+ %196 = call <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1> %193, <16 x i32> %194, <16 x i32> %195)
+ %197 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %196, i32 -1)
store volatile <16 x i32> %197, <16 x i32>* @Q6VecPredResult, align 64
%198 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %199 = bitcast <16 x i32> %198 to <512 x i1>
+ %199 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %198, i32 -1)
%200 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%201 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %202 = call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %199, <16 x i32> %200, <16 x i32> %201)
- %203 = bitcast <512 x i1> %202 to <16 x i32>
+ %202 = call <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1> %199, <16 x i32> %200, <16 x i32> %201)
+ %203 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %202, i32 -1)
store volatile <16 x i32> %203, <16 x i32>* @Q6VecPredResult, align 64
%204 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %205 = bitcast <16 x i32> %204 to <512 x i1>
+ %205 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %204, i32 -1)
%206 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%207 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %208 = call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %205, <16 x i32> %206, <16 x i32> %207)
- %209 = bitcast <512 x i1> %208 to <16 x i32>
+ %208 = call <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1> %205, <16 x i32> %206, <16 x i32> %207)
+ %209 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %208, i32 -1)
store volatile <16 x i32> %209, <16 x i32>* @Q6VecPredResult, align 64
%210 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %211 = bitcast <16 x i32> %210 to <512 x i1>
+ %211 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %210, i32 -1)
%212 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%213 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %214 = call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %211, <16 x i32> %212, <16 x i32> %213)
- %215 = bitcast <512 x i1> %214 to <16 x i32>
+ %214 = call <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1> %211, <16 x i32> %212, <16 x i32> %213)
+ %215 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %214, i32 -1)
store volatile <16 x i32> %215, <16 x i32>* @Q6VecPredResult, align 64
%216 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %217 = bitcast <16 x i32> %216 to <512 x i1>
+ %217 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %216, i32 -1)
%218 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%219 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %220 = call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %217, <16 x i32> %218, <16 x i32> %219)
- %221 = bitcast <512 x i1> %220 to <16 x i32>
+ %220 = call <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1> %217, <16 x i32> %218, <16 x i32> %219)
+ %221 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %220, i32 -1)
store volatile <16 x i32> %221, <16 x i32>* @Q6VecPredResult, align 64
%222 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %223 = bitcast <16 x i32> %222 to <512 x i1>
+ %223 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %222, i32 -1)
%224 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%225 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %226 = call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %223, <16 x i32> %224, <16 x i32> %225)
- %227 = bitcast <512 x i1> %226 to <16 x i32>
+ %226 = call <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1> %223, <16 x i32> %224, <16 x i32> %225)
+ %227 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %226, i32 -1)
store volatile <16 x i32> %227, <16 x i32>* @Q6VecPredResult, align 64
%228 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %229 = bitcast <16 x i32> %228 to <512 x i1>
+ %229 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %228, i32 -1)
%230 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%231 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %232 = call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %229, <16 x i32> %230, <16 x i32> %231)
- %233 = bitcast <512 x i1> %232 to <16 x i32>
+ %232 = call <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1> %229, <16 x i32> %230, <16 x i32> %231)
+ %233 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %232, i32 -1)
store volatile <16 x i32> %233, <16 x i32>* @Q6VecPredResult, align 64
- %234 = call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
- %235 = bitcast <512 x i1> %234 to <16 x i32>
+ %234 = call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
+ %235 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %234, i32 -1)
store volatile <16 x i32> %235, <16 x i32>* @Q6VecPredResult, align 64
%236 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %237 = bitcast <16 x i32> %236 to <512 x i1>
+ %237 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %236, i32 -1)
%238 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %239 = bitcast <16 x i32> %238 to <512 x i1>
- %240 = call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %237, <512 x i1> %239)
- %241 = bitcast <512 x i1> %240 to <16 x i32>
+ %239 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %238, i32 -1)
+ %240 = call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %237, <64 x i1> %239)
+ %241 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %240, i32 -1)
store volatile <16 x i32> %241, <16 x i32>* @Q6VecPredResult, align 64
%242 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%243 = call <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32> %242)
@@ -676,8 +676,8 @@ entry:
%253 = call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %251, <16 x i32> %252, i32 -1)
store volatile <16 x i32> %253, <16 x i32>* @VectorResult, align 64
%254 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %255 = bitcast <16 x i32> %254 to <512 x i1>
- %256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %255, i32 -1)
+ %255 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %254, i32 -1)
+ %256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %255, i32 -1)
store volatile <16 x i32> %256, <16 x i32>* @VectorResult, align 64
%257 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%258 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@@ -685,8 +685,8 @@ entry:
store volatile <16 x i32> %259, <16 x i32>* @VectorResult, align 64
%260 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%261 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %262 = bitcast <16 x i32> %261 to <512 x i1>
- %263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <512 x i1> %262, i32 -1)
+ %262 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %261, i32 -1)
+ %263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <64 x i1> %262, i32 -1)
store volatile <16 x i32> %263, <16 x i32>* @VectorResult, align 64
%264 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%265 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@@ -701,10 +701,10 @@ entry:
%272 = call <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32> %270, <16 x i32> %271, i32 -1)
store volatile <16 x i32> %272, <16 x i32>* @VectorResult, align 64
%273 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %274 = bitcast <16 x i32> %273 to <512 x i1>
+ %274 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %273, i32 -1)
%275 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%276 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %277 = call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %274, <16 x i32> %275, <16 x i32> %276)
+ %277 = call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %274, <16 x i32> %275, <16 x i32> %276)
store volatile <16 x i32> %277, <16 x i32>* @VectorResult, align 64
%278 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%279 = call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %278)
@@ -729,28 +729,28 @@ entry:
%292 = call <16 x i32> @llvm.hexagon.V6.vd0()
store volatile <16 x i32> %292, <16 x i32>* @VectorResult, align 64
%293 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %294 = bitcast <16 x i32> %293 to <512 x i1>
+ %294 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %293, i32 -1)
%295 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%296 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %294, <16 x i32> %295, <16 x i32> %296)
+ %297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %294, <16 x i32> %295, <16 x i32> %296)
store volatile <16 x i32> %297, <16 x i32>* @VectorResult, align 64
%298 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %299 = bitcast <16 x i32> %298 to <512 x i1>
+ %299 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %298, i32 -1)
%300 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%301 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %299, <16 x i32> %300, <16 x i32> %301)
+ %302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %299, <16 x i32> %300, <16 x i32> %301)
store volatile <16 x i32> %302, <16 x i32>* @VectorResult, align 64
%303 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %304 = bitcast <16 x i32> %303 to <512 x i1>
+ %304 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %303, i32 -1)
%305 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%306 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %304, <16 x i32> %305, <16 x i32> %306)
+ %307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %304, <16 x i32> %305, <16 x i32> %306)
store volatile <16 x i32> %307, <16 x i32>* @VectorResult, align 64
%308 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %309 = bitcast <16 x i32> %308 to <512 x i1>
+ %309 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %308, i32 -1)
%310 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%311 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %309, <16 x i32> %310, <16 x i32> %311)
+ %312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %309, <16 x i32> %310, <16 x i32> %311)
store volatile <16 x i32> %312, <16 x i32>* @VectorResult, align 64
%313 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%314 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@@ -812,28 +812,28 @@ entry:
%356 = call <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32> %354, <16 x i32> %355)
store volatile <16 x i32> %356, <16 x i32>* @VectorResult, align 64
%357 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %358 = bitcast <16 x i32> %357 to <512 x i1>
+ %358 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %357, i32 -1)
%359 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%360 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %358, <16 x i32> %359, <16 x i32> %360)
+ %361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %358, <16 x i32> %359, <16 x i32> %360)
store volatile <16 x i32> %361, <16 x i32>* @VectorResult, align 64
%362 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %363 = bitcast <16 x i32> %362 to <512 x i1>
+ %363 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %362, i32 -1)
%364 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%365 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %363, <16 x i32> %364, <16 x i32> %365)
+ %366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %363, <16 x i32> %364, <16 x i32> %365)
store volatile <16 x i32> %366, <16 x i32>* @VectorResult, align 64
%367 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %368 = bitcast <16 x i32> %367 to <512 x i1>
+ %368 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %367, i32 -1)
%369 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%370 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %368, <16 x i32> %369, <16 x i32> %370)
+ %371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %368, <16 x i32> %369, <16 x i32> %370)
store volatile <16 x i32> %371, <16 x i32>* @VectorResult, align 64
%372 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %373 = bitcast <16 x i32> %372 to <512 x i1>
+ %373 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %372, i32 -1)
%374 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%375 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %373, <16 x i32> %374, <16 x i32> %375)
+ %376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %373, <16 x i32> %374, <16 x i32> %375)
store volatile <16 x i32> %376, <16 x i32>* @VectorResult, align 64
%377 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%378 = call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %377)
@@ -1105,28 +1105,28 @@ entry:
%574 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32> %571, <16 x i32> %572, <16 x i32> %573)
store volatile <16 x i32> %574, <16 x i32>* @VectorResult, align 64
%575 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %576 = bitcast <16 x i32> %575 to <512 x i1>
+ %576 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %575, i32 -1)
%577 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%578 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %576, <16 x i32> %577, <16 x i32> %578)
+ %579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %576, <16 x i32> %577, <16 x i32> %578)
store volatile <16 x i32> %579, <16 x i32>* @VectorResult, align 64
%580 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %581 = bitcast <16 x i32> %580 to <512 x i1>
+ %581 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %580, i32 -1)
%582 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%583 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %581, <16 x i32> %582, <16 x i32> %583)
+ %584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %581, <16 x i32> %582, <16 x i32> %583)
store volatile <16 x i32> %584, <16 x i32>* @VectorResult, align 64
%585 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %586 = bitcast <16 x i32> %585 to <512 x i1>
+ %586 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %585, i32 -1)
%587 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%588 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %586, <16 x i32> %587, <16 x i32> %588)
+ %589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %586, <16 x i32> %587, <16 x i32> %588)
store volatile <16 x i32> %589, <16 x i32>* @VectorResult, align 64
%590 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %591 = bitcast <16 x i32> %590 to <512 x i1>
+ %591 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %590, i32 -1)
%592 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%593 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %591, <16 x i32> %592, <16 x i32> %593)
+ %594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %591, <16 x i32> %592, <16 x i32> %593)
store volatile <16 x i32> %594, <16 x i32>* @VectorResult, align 64
%595 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%596 = call <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32> %595)
@@ -1359,10 +1359,10 @@ entry:
%764 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %762, <16 x i32> %763, i32 1)
store volatile <32 x i32> %764, <32 x i32>* @VectorPairResult, align 128
%765 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %766 = bitcast <16 x i32> %765 to <512 x i1>
+ %766 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %765, i32 -1)
%767 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%768 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
- %769 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %766, <16 x i32> %767, <16 x i32> %768)
+ %769 = call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> %766, <16 x i32> %767, <16 x i32> %768)
store volatile <32 x i32> %769, <32 x i32>* @VectorPairResult, align 128
%770 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
%771 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
@@ -1664,139 +1664,139 @@ entry:
}
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #1
+declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #1
@@ -1814,13 +1814,13 @@ declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32>, <16 x i32>) #1
@@ -1832,7 +1832,7 @@ declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
@@ -1856,16 +1856,16 @@ declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32>, <16 x i32>) #1
@@ -1913,16 +1913,16 @@ declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1
@@ -2138,16 +2138,16 @@ declare <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #1
@@ -2318,7 +2318,7 @@ declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32>, <32 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/v60_sort16.ll b/llvm/test/CodeGen/Hexagon/v60_sort16.ll
index 6c4626a2390e..f54768ed3f20 100644
--- a/llvm/test/CodeGen/Hexagon/v60_sort16.ll
+++ b/llvm/test/CodeGen/Hexagon/v60_sort16.ll
@@ -60,10 +60,10 @@ b1: ; preds = %b3, %b0
b2: ; preds = %b1
%v34 = load <16 x i32>, <16 x i32>* %v11, align 64
- %v35 = bitcast <16 x i32> %v34 to <512 x i1>
+ %v35 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v34, i32 -1)
%v36 = load <16 x i32>, <16 x i32>* %v14, align 64
%v37 = load <16 x i32>, <16 x i32>* %v15, align 64
- %v38 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %v35, <16 x i32> %v36, <16 x i32> %v37)
+ %v38 = call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> %v35, <16 x i32> %v36, <16 x i32> %v37)
store <32 x i32> %v38, <32 x i32>* %v13, align 128
%v39 = load <32 x i32>, <32 x i32>* %v13, align 128
%v40 = call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v39)
@@ -89,7 +89,7 @@ b4: ; preds = %b1
}
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
@@ -100,5 +100,8 @@ declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/v60small.ll b/llvm/test/CodeGen/Hexagon/v60small.ll
index 746af018b06d..171ab28b0317 100644
--- a/llvm/test/CodeGen/Hexagon/v60small.ll
+++ b/llvm/test/CodeGen/Hexagon/v60small.ll
@@ -24,28 +24,34 @@ entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %1 = bitcast <16 x i32> %0 to <512 x i1>
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -1)
%2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %3 = bitcast <16 x i32> %2 to <512 x i1>
- %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
- %5 = bitcast <512 x i1> %4 to <16 x i32>
+ %3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %2, i32 -1)
+ %4 = call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %1, <64 x i1> %3)
+ %5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %4, i32 -1)
store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
%6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
- %7 = bitcast <16 x i32> %6 to <512 x i1>
+ %7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %6, i32 -1)
%8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
- %9 = bitcast <16 x i32> %8 to <512 x i1>
- %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
- %11 = bitcast <512 x i1> %10 to <16 x i32>
+ %9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 -1)
+ %10 = call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %7, <64 x i1> %9)
+ %11 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %10, i32 -1)
store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
ret i32 0
}
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll b/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
index 8831eab0a7d0..1ba41011f124 100644
--- a/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
+++ b/llvm/test/CodeGen/Hexagon/v62-inlasm4.ll
@@ -12,8 +12,9 @@ b0:
store i32 %a0, i32* %v0, align 4
store <16 x i32> %a1, <16 x i32>* %v1, align 64
%v3 = load i32, i32* %v0, align 4
- %v4 = load <16 x i32>, <16 x i32>* %v2, align 64
- call void asm sideeffect " $1 = vsetq2($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1
+ %v4 = tail call <64 x i1> asm sideeffect " $0 = vsetq2($1);\0A", "=q,r"(i32 %v3) #1
+ %v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
+ store <16 x i32> %v5, <16 x i32>* %v2, align 64
ret void
}
@@ -23,5 +24,7 @@ b0:
ret i32 0
}
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
+
attributes #0 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll b/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
index ec5cfe0e68fc..c5f989a88f53 100644
--- a/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
+++ b/llvm/test/CodeGen/Hexagon/v6vect-dbl-spill.ll
@@ -17,66 +17,66 @@ b1: ; preds = %b1, %b0
%v5 = phi i32 [ %v77, %b1 ], [ 0, %b0 ]
%v6 = phi <32 x i32>* [ undef, %b1 ], [ undef, %b0 ]
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> undef, <32 x i32> undef)
- %v8 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v7, <32 x i32> zeroinitializer)
- %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v8, <32 x i32> undef, <32 x i32> %v0)
+ %v8 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v7, <32 x i32> zeroinitializer)
+ %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v8, <32 x i32> undef, <32 x i32> %v0)
%v10 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 3)
%v11 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
%v12 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v10, <32 x i32> undef)
- %v13 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
- %v14 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
- %v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v13, <32 x i32> %v9, <32 x i32> %v0)
- %v16 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v14, <32 x i32> %v15, <32 x i32> %v0)
- %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v16, <32 x i32> %v0)
- %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v17, <32 x i32> %v0)
- %v19 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
- %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v19, <32 x i32> %v18, <32 x i32> %v0)
- %v21 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
- %v22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> undef)
- %v23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v21, <32 x i32> undef, <32 x i32> undef)
+ %v13 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
+ %v14 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
+ %v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v13, <32 x i32> %v9, <32 x i32> %v0)
+ %v16 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v14, <32 x i32> %v15, <32 x i32> %v0)
+ %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v16, <32 x i32> %v0)
+ %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v17, <32 x i32> %v0)
+ %v19 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+ %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v19, <32 x i32> %v18, <32 x i32> %v0)
+ %v21 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+ %v22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> undef)
+ %v23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v21, <32 x i32> undef, <32 x i32> undef)
%v24 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v23, <32 x i32> %v22)
%v25 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> zeroinitializer, <64 x i32> %v24, i32 16843009)
- %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v20, <32 x i32> %v0)
- %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v26, <32 x i32> %v0)
- %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v27, <32 x i32> %v0)
- %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v21, <32 x i32> %v28, <32 x i32> %v0)
- %v30 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
- %v31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> zeroinitializer)
+ %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v20, <32 x i32> %v0)
+ %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v26, <32 x i32> %v0)
+ %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v27, <32 x i32> %v0)
+ %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v21, <32 x i32> %v28, <32 x i32> %v0)
+ %v30 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+ %v31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> zeroinitializer)
%v32 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v31, <32 x i32> undef)
%v33 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v25, <64 x i32> %v32, i32 16843009)
%v34 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v33, <64 x i32> undef, i32 16843009)
- %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v29, <32 x i32> %v0)
- %v36 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v35, <32 x i32> %v0)
- %v37 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v36, <32 x i32> %v0)
- %v38 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v30, <32 x i32> %v37, <32 x i32> %v0)
+ %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v29, <32 x i32> %v0)
+ %v36 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v35, <32 x i32> %v0)
+ %v37 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v36, <32 x i32> %v0)
+ %v38 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v30, <32 x i32> %v37, <32 x i32> %v0)
%v39 = load <32 x i32>, <32 x i32>* null, align 128, !tbaa !0
- %v40 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
- %v41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v40, <32 x i32> undef, <32 x i32> %v39)
+ %v40 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+ %v41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v40, <32 x i32> undef, <32 x i32> %v39)
%v42 = tail call <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32> %v34, <32 x i32> %v41, i32 16843009)
- %v43 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v40, <32 x i32> %v38, <32 x i32> %v0)
+ %v43 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v40, <32 x i32> %v38, <32 x i32> %v0)
%v44 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> %v39, <32 x i32> undef, i32 1)
%v45 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> %v39, i32 1)
%v46 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> %v39, i32 2)
%v47 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v44, <32 x i32> undef)
%v48 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v45, <32 x i32> undef)
%v49 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v46, <32 x i32> undef)
- %v50 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v47, <32 x i32> zeroinitializer)
- %v51 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v48, <32 x i32> zeroinitializer)
- %v52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v49, <32 x i32> zeroinitializer)
- %v53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v52, <32 x i32> undef, <32 x i32> %v46)
+ %v50 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v47, <32 x i32> zeroinitializer)
+ %v51 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v48, <32 x i32> zeroinitializer)
+ %v52 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v49, <32 x i32> zeroinitializer)
+ %v53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v52, <32 x i32> undef, <32 x i32> %v46)
%v54 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v42, <64 x i32> undef, i32 16843009)
%v55 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v53, <32 x i32> undef)
%v56 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v54, <64 x i32> %v55, i32 16843009)
- %v57 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v50, <32 x i32> %v43, <32 x i32> %v0)
- %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v51, <32 x i32> %v57, <32 x i32> %v0)
- %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v58, <32 x i32> %v0)
- %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v52, <32 x i32> %v59, <32 x i32> %v0)
- %v61 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
+ %v57 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v50, <32 x i32> %v43, <32 x i32> %v0)
+ %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v51, <32 x i32> %v57, <32 x i32> %v0)
+ %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v58, <32 x i32> %v0)
+ %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v52, <32 x i32> %v59, <32 x i32> %v0)
+ %v61 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v62 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v56, <64 x i32> undef, i32 16843009)
%v63 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v62, <64 x i32> zeroinitializer, i32 16843009)
- %v64 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v60, <32 x i32> %v0)
- %v65 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v61, <32 x i32> %v64, <32 x i32> %v0)
- %v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v65, <32 x i32> %v0)
- %v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v66, <32 x i32> %v0)
+ %v64 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v60, <32 x i32> %v0)
+ %v65 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v61, <32 x i32> %v64, <32 x i32> %v0)
+ %v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v65, <32 x i32> %v0)
+ %v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v66, <32 x i32> %v0)
%v68 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> undef, <32 x i32> %v67, <32 x i32> %v1, i32 3)
%v69 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> %v68, <32 x i32> %v67, <32 x i32> %v2, i32 4)
%v70 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> %v69, <32 x i32> %v67, <32 x i32> %v2, i32 5)
@@ -108,16 +108,16 @@ declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32>, <32 x i32>, i32) #1
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll b/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
index a1c155621d5f..8be372a56c8f 100644
--- a/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
+++ b/llvm/test/CodeGen/Hexagon/v6vect-pred2.ll
@@ -19,8 +19,8 @@ b0:
%v1 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 12)
store <16 x i32> %v1, <16 x i32>* @g2, align 64, !tbaa !0
%v2 = load <16 x i32>, <16 x i32>* @g0, align 64, !tbaa !0
- %v3 = bitcast <16 x i32> %v2 to <512 x i1>
- %v4 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v3, <16 x i32> %v0, <16 x i32> %v1)
+ %v3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v2, i32 -1)
+ %v4 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v3, <16 x i32> %v0, <16 x i32> %v1)
store <16 x i32> %v4, <16 x i32>* @g3, align 64, !tbaa !0
ret i32 0
}
@@ -29,7 +29,10 @@ b0:
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll b/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
index 73633e6a877f..d724075a4ace 100644
--- a/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
+++ b/llvm/test/CodeGen/Hexagon/v6vect-spill-kill.ll
@@ -20,61 +20,61 @@ b2: ; preds = %b2, %b1
%v4 = phi <32 x i32>* [ %v3, %b1 ], [ undef, %b2 ]
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> zeroinitializer, i32 2)
%v6 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v5, <32 x i32> zeroinitializer)
- %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
- %v8 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v7, <32 x i32> zeroinitializer)
- %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> zeroinitializer, <32 x i32> %v8, <32 x i32> zeroinitializer)
- %v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v9, <32 x i32> zeroinitializer)
+ %v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
+ %v8 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v7, <32 x i32> zeroinitializer)
+ %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> zeroinitializer, <32 x i32> %v8, <32 x i32> zeroinitializer)
+ %v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v9, <32 x i32> zeroinitializer)
%v11 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> zeroinitializer, i32 4)
%v12 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
%v13 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
- %v14 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
- %v15 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> undef)
- %v16 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v13, <32 x i32> undef)
- %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v10, <32 x i32> zeroinitializer)
- %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v14, <32 x i32> %v17, <32 x i32> zeroinitializer)
- %v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v15, <32 x i32> %v18, <32 x i32> zeroinitializer)
- %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v16, <32 x i32> %v19, <32 x i32> zeroinitializer)
+ %v14 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
+ %v15 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> undef)
+ %v16 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v13, <32 x i32> undef)
+ %v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v10, <32 x i32> zeroinitializer)
+ %v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v14, <32 x i32> %v17, <32 x i32> zeroinitializer)
+ %v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v15, <32 x i32> %v18, <32 x i32> zeroinitializer)
+ %v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v16, <32 x i32> %v19, <32 x i32> zeroinitializer)
%v21 = getelementptr inbounds i8, i8* null, i32 undef
%v22 = bitcast i8* %v21 to <32 x i32>*
%v23 = load <32 x i32>, <32 x i32>* %v22, align 128, !tbaa !0
%v24 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v23, <32 x i32> zeroinitializer)
- %v25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v24, <32 x i32> undef)
- %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v25, <32 x i32> %v20, <32 x i32> zeroinitializer)
- %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v26, <32 x i32> zeroinitializer)
- %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v27, <32 x i32> zeroinitializer)
- %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v28, <32 x i32> zeroinitializer)
- %v30 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v29, <32 x i32> zeroinitializer)
- %v31 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v30, <32 x i32> zeroinitializer)
- %v32 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v31, <32 x i32> zeroinitializer)
- %v33 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v32, <32 x i32> zeroinitializer)
- %v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v33, <32 x i32> zeroinitializer)
- %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v34, <32 x i32> zeroinitializer)
+ %v25 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v24, <32 x i32> undef)
+ %v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v25, <32 x i32> %v20, <32 x i32> zeroinitializer)
+ %v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v26, <32 x i32> zeroinitializer)
+ %v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v27, <32 x i32> zeroinitializer)
+ %v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v28, <32 x i32> zeroinitializer)
+ %v30 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v29, <32 x i32> zeroinitializer)
+ %v31 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v30, <32 x i32> zeroinitializer)
+ %v32 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v31, <32 x i32> zeroinitializer)
+ %v33 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v32, <32 x i32> zeroinitializer)
+ %v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v33, <32 x i32> zeroinitializer)
+ %v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v34, <32 x i32> zeroinitializer)
%v36 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 1)
%v37 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 1)
%v38 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 2)
%v39 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v36, <32 x i32> zeroinitializer)
%v40 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v37, <32 x i32> zeroinitializer)
%v41 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v38, <32 x i32> zeroinitializer)
- %v42 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v39, <32 x i32> undef)
- %v43 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v40, <32 x i32> undef)
- %v44 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v41, <32 x i32> undef)
- %v45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> undef)
- %v46 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v42, <32 x i32> %v35, <32 x i32> zeroinitializer)
- %v47 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v43, <32 x i32> %v46, <32 x i32> zeroinitializer)
- %v48 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v44, <32 x i32> %v47, <32 x i32> zeroinitializer)
- %v49 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v45, <32 x i32> %v48, <32 x i32> zeroinitializer)
+ %v42 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v39, <32 x i32> undef)
+ %v43 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v40, <32 x i32> undef)
+ %v44 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v41, <32 x i32> undef)
+ %v45 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> undef)
+ %v46 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v42, <32 x i32> %v35, <32 x i32> zeroinitializer)
+ %v47 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v43, <32 x i32> %v46, <32 x i32> zeroinitializer)
+ %v48 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v44, <32 x i32> %v47, <32 x i32> zeroinitializer)
+ %v49 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v45, <32 x i32> %v48, <32 x i32> zeroinitializer)
%v50 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 4)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 4)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v50, <32 x i32> zeroinitializer)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32> %v51, <32 x i32> zeroinitializer)
- %v55 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v52, <32 x i32> undef)
- %v56 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v53, <32 x i32> undef)
- %v57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v54, <32 x i32> undef)
- %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v49, <32 x i32> zeroinitializer)
- %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v55, <32 x i32> %v58, <32 x i32> zeroinitializer)
- %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v56, <32 x i32> %v59, <32 x i32> zeroinitializer)
- %v61 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v57, <32 x i32> %v60, <32 x i32> zeroinitializer)
+ %v55 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v52, <32 x i32> undef)
+ %v56 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v53, <32 x i32> undef)
+ %v57 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v54, <32 x i32> undef)
+ %v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v49, <32 x i32> zeroinitializer)
+ %v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v55, <32 x i32> %v58, <32 x i32> zeroinitializer)
+ %v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v56, <32 x i32> %v59, <32 x i32> zeroinitializer)
+ %v61 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v57, <32 x i32> %v60, <32 x i32> zeroinitializer)
%v62 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> zeroinitializer, <32 x i32> %v61, <32 x i32> undef, i32 5)
%v63 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> undef, <32 x i32> undef)
%v64 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v62)
@@ -100,10 +100,10 @@ declare <32 x i32> @llvm.hexagon.V6.vshuffh.128B(<32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vabs
diff ub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
-declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #1
diff --git a/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
index 08f7e33579e4..f8a12d33689b 100644
--- a/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
+++ b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
@@ -30,20 +30,20 @@ entry:
%call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3
tail call void @init_vectors() #3
%0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
+ %1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
%2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648)
- %4 = bitcast <512 x i1> %3 to <16 x i32>
+ %3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 -2147483648)
+ %4 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %3, i32 -1)
store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
- %5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1)
- %6 = bitcast <512 x i1> %5 to <16 x i32>
+ %5 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 -1)
+ %6 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %5, i32 -1)
store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
- %7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0)
- %8 = bitcast <512 x i1> %7 to <16 x i32>
+ %7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 0)
+ %8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %7, i32 -1)
store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
@@ -57,10 +57,13 @@ declare i32 @acquire_vector_unit(i8 zeroext) #1
declare void @init_vectors() #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2
+declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #2
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #2
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2
diff --git a/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll b/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
index c609c52f98c4..ab4f7eee1a3f 100644
--- a/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
+++ b/llvm/test/CodeGen/Hexagon/vecPred2Vec.ll
@@ -11,19 +11,18 @@ target triple = "hexagon"
define i32 @f0() #0 {
b0:
%v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %v1 = bitcast <16 x i32> %v0 to <512 x i1>
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
- %v3 = bitcast <16 x i32> %v2 to <512 x i1>
- %v4 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> %v3)
- %v5 = bitcast <512 x i1> %v4 to <16 x i32>
+ %v3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v2, i32 -1)
+ %v4 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> %v3)
+ %v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
store <16 x i32> %v5, <16 x i32>* @g0, align 64, !tbaa !0
ret i32 0
}
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
-
-; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32)
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
+declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
diff --git a/llvm/test/CodeGen/Hexagon/vect-downscale.ll b/llvm/test/CodeGen/Hexagon/vect-downscale.ll
index 9ab6b1bee2c4..ce10b74a72a6 100644
--- a/llvm/test/CodeGen/Hexagon/vect-downscale.ll
+++ b/llvm/test/CodeGen/Hexagon/vect-downscale.ll
@@ -34,7 +34,7 @@ b0:
%v11 = mul i32 %v10, %v9
%v12 = sub i32 %a1, %v11
%v13 = lshr i32 %v12, 1
- %v14 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
+ %v14 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
%v15 = icmp eq i32 %a2, 0
br i1 %v15, label %b11, label %b1
@@ -132,7 +132,7 @@ b9: ; preds = %b8, %b7
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32> %v78, <16 x i32> %v76, i32 1077952576)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %v80, <16 x i32> %v79)
%v82 = load <16 x i32>, <16 x i32>* %v68, align 64, !tbaa !2
- %v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v14, <16 x i32> %v81, <16 x i32> %v82)
+ %v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v14, <16 x i32> %v81, <16 x i32> %v82)
store <16 x i32> %v83, <16 x i32>* %v68, align 64, !tbaa !2
br label %b10
@@ -157,7 +157,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare i32 @llvm.hexagon.S2.ct0(i32) #1
; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
@@ -166,7 +166,7 @@ declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
-declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/vector-align.ll b/llvm/test/CodeGen/Hexagon/vector-align.ll
index 043839c704ae..d2e0071700ed 100644
--- a/llvm/test/CodeGen/Hexagon/vector-align.ll
+++ b/llvm/test/CodeGen/Hexagon/vector-align.ll
@@ -1,5 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s \
-; RUN: | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that the store to Q6VecPredResult does not get expanded into multiple
; stores. There should be no memd's. This relies on the alignment specified
@@ -11,25 +10,23 @@
@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
-; Function Attrs: nounwind
define i32 @foo() #0 {
entry:
- %0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
- %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -2147483648)
- store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @Q6VecPredResult to <512 x i1>*), align 64, !tbaa !1
+ %v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+ %v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -2147483648)
+ %v2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v1, i32 -1)
+ store <16 x i32> %v2, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
tail call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
ret i32 0
}
-; Function Attrs: nounwind readnone
-declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
-
-; Function Attrs: nounwind readnone
+declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare void @print_vecpred(i32, i8*) #2
-attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" }
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length64b" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll b/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
index e6be3ee69c04..58fe4ad6675a 100644
--- a/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
+++ b/llvm/test/CodeGen/Hexagon/vselect-pseudo.ll
@@ -12,7 +12,7 @@ for.body9.us:
%cmp10.us = icmp eq i32 0, undef
%.h63h32.2.us = select i1 %cmp10.us, <16 x i32> zeroinitializer, <16 x i32> undef
%0 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %.h63h32.2.us, <16 x i32> undef, i32 2)
- %1 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> undef, <16 x i32> undef, <16 x i32> %0)
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> undef, <16 x i32> undef, <16 x i32> %0)
%2 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %1)
%3 = tail call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> undef, <16 x i32> %2, i32 62)
%4 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %3)
@@ -24,7 +24,7 @@ for.body43.us.preheader: ; preds = %for.body9.us
}
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
-declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
More information about the cfe-commits
mailing list