[llvm] deda60f - [Hexagon] Add HVX intrinsics for conditional vector loads/stores
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 22 09:49:49 PDT 2021
Author: Krzysztof Parzyszek
Date: 2021-04-22T11:49:29-05:00
New Revision: deda60fcaf0be162e893ff68d8d91355e3ac5542
URL: https://github.com/llvm/llvm-project/commit/deda60fcaf0be162e893ff68d8d91355e3ac5542
DIFF: https://github.com/llvm/llvm-project/commit/deda60fcaf0be162e893ff68d8d91355e3ac5542.diff
LOG: [Hexagon] Add HVX intrinsics for conditional vector loads/stores
Intrinsics for the following instructions are added. The intrinsic
name is "int_hexagon_<inst>[_128B]", e.g.
int_hexagon_V6_vL32b_pred_ai for 64-byte version
int_hexagon_V6_vL32b_pred_ai_128B for 128-byte version
V6_vL32b_pred_ai if (Pv4) Vd32 = vmem(Rt32+#s4)
V6_vL32b_pred_pi if (Pv4) Vd32 = vmem(Rx32++#s3)
V6_vL32b_pred_ppu if (Pv4) Vd32 = vmem(Rx32++Mu2)
V6_vL32b_npred_ai if (!Pv4) Vd32 = vmem(Rt32+#s4)
V6_vL32b_npred_pi if (!Pv4) Vd32 = vmem(Rx32++#s3)
V6_vL32b_npred_ppu if (!Pv4) Vd32 = vmem(Rx32++Mu2)
V6_vL32b_nt_pred_ai if (Pv4) Vd32 = vmem(Rt32+#s4):nt
V6_vL32b_nt_pred_pi if (Pv4) Vd32 = vmem(Rx32++#s3):nt
V6_vL32b_nt_pred_ppu if (Pv4) Vd32 = vmem(Rx32++Mu2):nt
V6_vL32b_nt_npred_ai if (!Pv4) Vd32 = vmem(Rt32+#s4):nt
V6_vL32b_nt_npred_pi if (!Pv4) Vd32 = vmem(Rx32++#s3):nt
V6_vL32b_nt_npred_ppu if (!Pv4) Vd32 = vmem(Rx32++Mu2):nt
V6_vS32b_pred_ai if (Pv4) vmem(Rt32+#s4) = Vs32
V6_vS32b_pred_pi if (Pv4) vmem(Rx32++#s3) = Vs32
V6_vS32b_pred_ppu if (Pv4) vmem(Rx32++Mu2) = Vs32
V6_vS32b_npred_ai if (!Pv4) vmem(Rt32+#s4) = Vs32
V6_vS32b_npred_pi if (!Pv4) vmem(Rx32++#s3) = Vs32
V6_vS32b_npred_ppu if (!Pv4) vmem(Rx32++Mu2) = Vs32
V6_vS32Ub_pred_ai if (Pv4) vmemu(Rt32+#s4) = Vs32
V6_vS32Ub_pred_pi if (Pv4) vmemu(Rx32++#s3) = Vs32
V6_vS32Ub_pred_ppu if (Pv4) vmemu(Rx32++Mu2) = Vs32
V6_vS32Ub_npred_ai if (!Pv4) vmemu(Rt32+#s4) = Vs32
V6_vS32Ub_npred_pi if (!Pv4) vmemu(Rx32++#s3) = Vs32
V6_vS32Ub_npred_ppu if (!Pv4) vmemu(Rx32++Mu2) = Vs32
V6_vS32b_nt_pred_ai if (Pv4) vmem(Rt32+#s4):nt = Vs32
V6_vS32b_nt_pred_pi if (Pv4) vmem(Rx32++#s3):nt = Vs32
V6_vS32b_nt_pred_ppu if (Pv4) vmem(Rx32++Mu2):nt = Vs32
V6_vS32b_nt_npred_ai if (!Pv4) vmem(Rt32+#s4):nt = Vs32
V6_vS32b_nt_npred_pi if (!Pv4) vmem(Rx32++#s3):nt = Vs32
V6_vS32b_nt_npred_ppu if (!Pv4) vmem(Rx32++Mu2):nt = Vs32
Added:
llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll
llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll
Modified:
llvm/include/llvm/IR/IntrinsicsHexagon.td
llvm/lib/Target/Hexagon/HexagonIntrinsics.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index fe16a361ba3d6..212262c287063 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -253,6 +253,124 @@ Hexagon_v32i32_v32i32v16i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">;
def int_hexagon_V6_vrmpyub_rtt_acc_128B :
Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
+// HVX conditional loads/stores
+
+class Hexagon_pred_vload_imm<LLVMType ValTy>
+ : Hexagon_NonGCC_Intrinsic<
+ [ValTy],
+ [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty],
+ [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
+
+class Hexagon_pred_vload_imm_64B: Hexagon_pred_vload_imm<llvm_v16i32_ty>;
+class Hexagon_pred_vload_imm_128B: Hexagon_pred_vload_imm<llvm_v32i32_ty>;
+
+def int_hexagon_V6_vL32b_pred_ai: Hexagon_pred_vload_imm_64B;
+def int_hexagon_V6_vL32b_npred_ai: Hexagon_pred_vload_imm_64B;
+def int_hexagon_V6_vL32b_nt_pred_ai: Hexagon_pred_vload_imm_64B;
+def int_hexagon_V6_vL32b_nt_npred_ai: Hexagon_pred_vload_imm_64B;
+def int_hexagon_V6_vL32b_pred_ai_128B: Hexagon_pred_vload_imm_128B;
+def int_hexagon_V6_vL32b_npred_ai_128B: Hexagon_pred_vload_imm_128B;
+def int_hexagon_V6_vL32b_nt_pred_ai_128B: Hexagon_pred_vload_imm_128B;
+def int_hexagon_V6_vL32b_nt_npred_ai_128B: Hexagon_pred_vload_imm_128B;
+
+class Hexagom_pred_vload_upd<LLVMType ValTy, bit TakesImm>
+ : Hexagon_NonGCC_Intrinsic<
+ [ValTy, LLVMPointerType<ValTy>],
+ [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty],
+ !if(TakesImm,
+ [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>],
+ [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>])>;
+
+class Hexagom_pred_vload_upd_64B<bit TakesImm>
+ : Hexagom_pred_vload_upd<llvm_v16i32_ty, TakesImm>;
+class Hexagom_pred_vload_upd_128B<bit TakesImm>
+ : Hexagom_pred_vload_upd<llvm_v32i32_ty, TakesImm>;
+
+def int_hexagon_V6_vL32b_pred_pi: Hexagom_pred_vload_upd_64B<1>;
+def int_hexagon_V6_vL32b_npred_pi: Hexagom_pred_vload_upd_64B<1>;
+def int_hexagon_V6_vL32b_nt_pred_pi: Hexagom_pred_vload_upd_64B<1>;
+def int_hexagon_V6_vL32b_nt_npred_pi: Hexagom_pred_vload_upd_64B<1>;
+def int_hexagon_V6_vL32b_pred_pi_128B: Hexagom_pred_vload_upd_128B<1>;
+def int_hexagon_V6_vL32b_npred_pi_128B: Hexagom_pred_vload_upd_128B<1>;
+def int_hexagon_V6_vL32b_nt_pred_pi_128B: Hexagom_pred_vload_upd_128B<1>;
+def int_hexagon_V6_vL32b_nt_npred_pi_128B: Hexagom_pred_vload_upd_128B<1>;
+
+def int_hexagon_V6_vL32b_pred_ppu: Hexagom_pred_vload_upd_64B<0>;
+def int_hexagon_V6_vL32b_npred_ppu: Hexagom_pred_vload_upd_64B<0>;
+def int_hexagon_V6_vL32b_nt_pred_ppu: Hexagom_pred_vload_upd_64B<0>;
+def int_hexagon_V6_vL32b_nt_npred_ppu: Hexagom_pred_vload_upd_64B<0>;
+def int_hexagon_V6_vL32b_pred_ppu_128B: Hexagom_pred_vload_upd_128B<0>;
+def int_hexagon_V6_vL32b_npred_ppu_128B: Hexagom_pred_vload_upd_128B<0>;
+def int_hexagon_V6_vL32b_nt_pred_ppu_128B: Hexagom_pred_vload_upd_128B<0>;
+def int_hexagon_V6_vL32b_nt_npred_ppu_128B: Hexagom_pred_vload_upd_128B<0>;
+
+
+class Hexagon_pred_vstore_imm<LLVMType ValTy>
+ : Hexagon_NonGCC_Intrinsic<
+ [],
+ [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty, ValTy],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
+
+class Hexagon_pred_vstore_imm_64B: Hexagon_pred_vstore_imm<llvm_v16i32_ty>;
+class Hexagon_pred_vstore_imm_128B: Hexagon_pred_vstore_imm<llvm_v32i32_ty>;
+
+def int_hexagon_V6_vS32b_pred_ai: Hexagon_pred_vstore_imm_64B;
+def int_hexagon_V6_vS32b_npred_ai: Hexagon_pred_vstore_imm_64B;
+def int_hexagon_V6_vS32Ub_pred_ai: Hexagon_pred_vstore_imm_64B;
+def int_hexagon_V6_vS32Ub_npred_ai: Hexagon_pred_vstore_imm_64B;
+def int_hexagon_V6_vS32b_nt_pred_ai: Hexagon_pred_vstore_imm_64B;
+def int_hexagon_V6_vS32b_nt_npred_ai: Hexagon_pred_vstore_imm_64B;
+def int_hexagon_V6_vS32b_pred_ai_128B: Hexagon_pred_vstore_imm_128B;
+def int_hexagon_V6_vS32b_npred_ai_128B: Hexagon_pred_vstore_imm_128B;
+def int_hexagon_V6_vS32Ub_pred_ai_128B: Hexagon_pred_vstore_imm_128B;
+def int_hexagon_V6_vS32Ub_npred_ai_128B: Hexagon_pred_vstore_imm_128B;
+def int_hexagon_V6_vS32b_nt_pred_ai_128B: Hexagon_pred_vstore_imm_128B;
+def int_hexagon_V6_vS32b_nt_npred_ai_128B: Hexagon_pred_vstore_imm_128B;
+
+class Hexagon_pred_vstore_upd<LLVMType ValTy, bit TakesImm>
+ : Hexagon_NonGCC_Intrinsic<
+ [LLVMPointerType<ValTy>],
+ [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty, ValTy],
+ !if(TakesImm,
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>])>;
+
+class Hexagon_pred_vstore_upd_64B<bit TakesImm>
+ : Hexagon_pred_vstore_upd<llvm_v16i32_ty, TakesImm>;
+class Hexagon_pred_vstore_upd_128B<bit TakesImm>
+ : Hexagon_pred_vstore_upd<llvm_v32i32_ty, TakesImm>;
+
+def int_hexagon_V6_vS32b_pred_pi: Hexagon_pred_vstore_upd_64B<1>;
+def int_hexagon_V6_vS32b_npred_pi: Hexagon_pred_vstore_upd_64B<1>;
+def int_hexagon_V6_vS32Ub_pred_pi: Hexagon_pred_vstore_upd_64B<1>;
+def int_hexagon_V6_vS32Ub_npred_pi: Hexagon_pred_vstore_upd_64B<1>;
+def int_hexagon_V6_vS32b_nt_pred_pi: Hexagon_pred_vstore_upd_64B<1>;
+def int_hexagon_V6_vS32b_nt_npred_pi: Hexagon_pred_vstore_upd_64B<1>;
+def int_hexagon_V6_vS32b_pred_pi_128B: Hexagon_pred_vstore_upd_128B<1>;
+def int_hexagon_V6_vS32b_npred_pi_128B: Hexagon_pred_vstore_upd_128B<1>;
+def int_hexagon_V6_vS32Ub_pred_pi_128B: Hexagon_pred_vstore_upd_128B<1>;
+def int_hexagon_V6_vS32Ub_npred_pi_128B: Hexagon_pred_vstore_upd_128B<1>;
+def int_hexagon_V6_vS32b_nt_pred_pi_128B: Hexagon_pred_vstore_upd_128B<1>;
+def int_hexagon_V6_vS32b_nt_npred_pi_128B: Hexagon_pred_vstore_upd_128B<1>;
+
+def int_hexagon_V6_vS32b_pred_ppu: Hexagon_pred_vstore_upd_64B<0>;
+def int_hexagon_V6_vS32b_npred_ppu: Hexagon_pred_vstore_upd_64B<0>;
+def int_hexagon_V6_vS32Ub_pred_ppu: Hexagon_pred_vstore_upd_64B<0>;
+def int_hexagon_V6_vS32Ub_npred_ppu: Hexagon_pred_vstore_upd_64B<0>;
+def int_hexagon_V6_vS32b_nt_pred_ppu: Hexagon_pred_vstore_upd_64B<0>;
+def int_hexagon_V6_vS32b_nt_npred_ppu: Hexagon_pred_vstore_upd_64B<0>;
+def int_hexagon_V6_vS32b_pred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>;
+def int_hexagon_V6_vS32b_npred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>;
+def int_hexagon_V6_vS32Ub_pred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>;
+def int_hexagon_V6_vS32Ub_npred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>;
+def int_hexagon_V6_vS32b_nt_pred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>;
+def int_hexagon_V6_vS32b_nt_npred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>;
+
+
// HVX Vector predicate casts.
// These intrinsics do not emit (nor do they correspond to) any instructions,
// they are no-ops.
@@ -265,6 +383,8 @@ Hexagon_NonGCC_Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
// Masked vector stores
//
+// These are all deprecated, the intrinsics matching instruction names
+// should be used instead, e.g. int_hexagon_V6_vS32b_qpred_ai, etc.
class Hexagon_custom_vms_Intrinsic
: Hexagon_NonGCC_Intrinsic<
diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 10d0261a95dd4..68d3e6fd4961b 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -402,4 +402,74 @@ def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxW
def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
(V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+multiclass T_pRI_pat<InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID PredRegs:$P, IntRegs:$R, timm:$s),
+ (MI PredRegs:$P, IntRegs:$R, imm:$s)>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")
+ PredRegs:$P, IntRegs:$R, timm:$s),
+ (MI PredRegs:$P, IntRegs:$R, imm:$s)>;
+}
+
+multiclass T_pRM_pat<InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID PredRegs:$P, IntRegs:$R, ModRegs:$M),
+ (MI PredRegs:$P, IntRegs:$R, ModRegs:$M)>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")
+ PredRegs:$P, IntRegs:$R, ModRegs:$M),
+ (MI PredRegs:$P, IntRegs:$R, ModRegs:$M)>;
+}
+
+let Predicates = [HasV62, UseHVX] in {
+ defm: T_pRI_pat<V6_vL32b_pred_ai, int_hexagon_V6_vL32b_pred_ai>;
+ defm: T_pRI_pat<V6_vL32b_npred_ai, int_hexagon_V6_vL32b_npred_ai>;
+ defm: T_pRI_pat<V6_vL32b_pred_pi, int_hexagon_V6_vL32b_pred_pi>;
+ defm: T_pRI_pat<V6_vL32b_npred_pi, int_hexagon_V6_vL32b_npred_pi>;
+ defm: T_pRI_pat<V6_vL32b_nt_pred_ai, int_hexagon_V6_vL32b_nt_pred_ai>;
+ defm: T_pRI_pat<V6_vL32b_nt_npred_ai, int_hexagon_V6_vL32b_nt_npred_ai>;
+ defm: T_pRI_pat<V6_vL32b_nt_pred_pi, int_hexagon_V6_vL32b_nt_pred_pi>;
+ defm: T_pRI_pat<V6_vL32b_nt_npred_pi, int_hexagon_V6_vL32b_nt_npred_pi>;
+
+ defm: T_pRM_pat<V6_vL32b_pred_ppu, int_hexagon_V6_vL32b_pred_ppu>;
+ defm: T_pRM_pat<V6_vL32b_npred_ppu, int_hexagon_V6_vL32b_npred_ppu>;
+ defm: T_pRM_pat<V6_vL32b_nt_pred_ppu, int_hexagon_V6_vL32b_nt_pred_ppu>;
+ defm: T_pRM_pat<V6_vL32b_nt_npred_ppu, int_hexagon_V6_vL32b_nt_npred_ppu>;
+}
+
+multiclass T_pRIV_pat<InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID PredRegs:$P, IntRegs:$R, timm:$s, HvxVR:$V),
+ (MI PredRegs:$P, IntRegs:$R, imm:$s, HvxVR:$V)>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")
+ PredRegs:$P, IntRegs:$R, timm:$s, HvxVR:$V),
+ (MI PredRegs:$P, IntRegs:$R, imm:$s, HvxVR:$V)>;
+}
+
+multiclass T_pRMV_pat<InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V),
+ (MI PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V)>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")
+ PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V),
+ (MI PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V)>;
+}
+
+let Predicates = [HasV60, UseHVX] in {
+ defm: T_pRIV_pat<V6_vS32b_pred_ai, int_hexagon_V6_vS32b_pred_ai>;
+ defm: T_pRIV_pat<V6_vS32b_npred_ai, int_hexagon_V6_vS32b_npred_ai>;
+ defm: T_pRIV_pat<V6_vS32b_pred_pi, int_hexagon_V6_vS32b_pred_pi>;
+ defm: T_pRIV_pat<V6_vS32b_npred_pi, int_hexagon_V6_vS32b_npred_pi>;
+ defm: T_pRIV_pat<V6_vS32Ub_pred_ai, int_hexagon_V6_vS32Ub_pred_ai>;
+ defm: T_pRIV_pat<V6_vS32Ub_npred_ai, int_hexagon_V6_vS32Ub_npred_ai>;
+ defm: T_pRIV_pat<V6_vS32Ub_pred_pi, int_hexagon_V6_vS32Ub_pred_pi>;
+ defm: T_pRIV_pat<V6_vS32Ub_npred_pi, int_hexagon_V6_vS32Ub_npred_pi>;
+ defm: T_pRIV_pat<V6_vS32b_nt_pred_ai, int_hexagon_V6_vS32b_nt_pred_ai>;
+ defm: T_pRIV_pat<V6_vS32b_nt_npred_ai, int_hexagon_V6_vS32b_nt_npred_ai>;
+ defm: T_pRIV_pat<V6_vS32b_nt_pred_pi, int_hexagon_V6_vS32b_nt_pred_pi>;
+ defm: T_pRIV_pat<V6_vS32b_nt_npred_pi, int_hexagon_V6_vS32b_nt_npred_pi>;
+
+ defm: T_pRMV_pat<V6_vS32b_pred_ppu, int_hexagon_V6_vS32b_pred_ppu>;
+ defm: T_pRMV_pat<V6_vS32b_npred_ppu, int_hexagon_V6_vS32b_npred_ppu>;
+ defm: T_pRMV_pat<V6_vS32Ub_pred_ppu, int_hexagon_V6_vS32Ub_pred_ppu>;
+ defm: T_pRMV_pat<V6_vS32Ub_npred_ppu, int_hexagon_V6_vS32Ub_npred_ppu>;
+ defm: T_pRMV_pat<V6_vS32b_nt_pred_ppu, int_hexagon_V6_vS32b_nt_pred_ppu>;
+ defm: T_pRMV_pat<V6_vS32b_nt_npred_ppu, int_hexagon_V6_vS32b_nt_npred_ppu>;
+}
+
include "HexagonDepMapAsm2Intrin.td"
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll
new file mode 100644
index 0000000000000..c7f052b7a6dba
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll
@@ -0,0 +1,679 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+declare <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1, <32 x i32>*, i32)
+declare <32 x i32> @llvm.hexagon.V6.vL32b.npred.ai.128B(i1, <32 x i32>*, i32)
+declare <32 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai.128B(i1, <32 x i32>*, i32)
+declare <32 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai.128B(i1, <32 x i32>*, i32)
+
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi.128B(i1, <32 x i32>*, i32)
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi.128B(i1, <32 x i32>*, i32)
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi.128B(i1, <32 x i32>*, i32)
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi.128B(i1, <32 x i32>*, i32)
+
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu.128B(i1, <32 x i32>*, i32)
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu.128B(i1, <32 x i32>*, i32)
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu.128B(i1, <32 x i32>*, i32)
+declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu.128B(i1, <32 x i32>*, i32)
+
+declare void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vS32b.npred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vS32Ub.pred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vS32Ub.npred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vS32b.nt.pred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare void @llvm.hexagon.V6.vS32b.nt.npred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>)
+
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.pred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.npred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>)
+
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>)
+declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>)
+
+
+define <32 x i32> @f0(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f0:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r1+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ ret <32 x i32> %v1
+}
+
+define <32 x i32> @f1(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ ret <32 x i32> %v1
+}
+
+define <32 x i32> @f2(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r1+#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ ret <32 x i32> %v1
+}
+
+define <32 x i32> @f3(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ ret <32 x i32> %v1
+}
+
+define <32 x i32>* @f4(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f4:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f5(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f5:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f6(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f6:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f7(i32 %a0, <32 x i32>* %a1) #0 {
+; CHECK-LABEL: f7:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f8(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++m0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f9(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f9:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f10(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f10:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++m0):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define <32 x i32>* @f11(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1
+ ret <32 x i32>* %v2
+}
+
+define void @f12(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f12:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret void
+}
+
+define void @f13(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f13:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret void
+}
+
+define void @f14(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f14:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmemu(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32Ub.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret void
+}
+
+define void @f15(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f15:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmemu(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32Ub.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret void
+}
+
+define void @f16(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f16:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r1+#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.nt.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret void
+}
+
+define void @f17(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f17:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r1+#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.nt.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret void
+}
+
+define <32 x i32>* @f18(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f18:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f19(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f19:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f20(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f20:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmemu(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f21(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f21:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmemu(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f22(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f22:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f23(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 {
+; CHECK-LABEL: f23:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f24(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 {
+; CHECK-LABEL: f24:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f25(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 {
+; CHECK-LABEL: f25:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f26(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 {
+; CHECK-LABEL: f26:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmemu(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f27(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 {
+; CHECK-LABEL: f27:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmemu(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f28(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 {
+; CHECK-LABEL: f28:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++m0):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3)
+ ret <32 x i32>* %v1
+}
+
+define <32 x i32>* @f29(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 {
+; CHECK-LABEL: f29:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++m0):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3)
+ ret <32 x i32>* %v1
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b,-packets" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll
new file mode 100644
index 0000000000000..8fd6f264d838f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll
@@ -0,0 +1,679 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+declare <16 x i32> @llvm.hexagon.V6.vL32b.pred.ai(i1, <16 x i32>*, i32)
+declare <16 x i32> @llvm.hexagon.V6.vL32b.npred.ai(i1, <16 x i32>*, i32)
+declare <16 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai(i1, <16 x i32>*, i32)
+declare <16 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai(i1, <16 x i32>*, i32)
+
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi(i1, <16 x i32>*, i32)
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi(i1, <16 x i32>*, i32)
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi(i1, <16 x i32>*, i32)
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi(i1, <16 x i32>*, i32)
+
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu(i1, <16 x i32>*, i32)
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu(i1, <16 x i32>*, i32)
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu(i1, <16 x i32>*, i32)
+declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu(i1, <16 x i32>*, i32)
+
+declare void @llvm.hexagon.V6.vS32b.pred.ai(i1, <16 x i32>*, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vS32b.npred.ai(i1, <16 x i32>*, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vS32Ub.pred.ai(i1, <16 x i32>*, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vS32Ub.npred.ai(i1, <16 x i32>*, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vS32b.nt.pred.ai(i1, <16 x i32>*, i32, <16 x i32>)
+declare void @llvm.hexagon.V6.vS32b.nt.npred.ai(i1, <16 x i32>*, i32, <16 x i32>)
+
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.pred.pi(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.npred.pi(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi(i1, <16 x i32>*, i32, <16 x i32>)
+
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu(i1, <16 x i32>*, i32, <16 x i32>)
+declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu(i1, <16 x i32>*, i32, <16 x i32>)
+
+
+define <16 x i32> @f0(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f0:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r1+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.pred.ai(i1 %v0, <16 x i32>* %a1, i32 192)
+ ret <16 x i32> %v1
+}
+
+define <16 x i32> @f1(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.npred.ai(i1 %v0, <16 x i32>* %a1, i32 192)
+ ret <16 x i32> %v1
+}
+
+define <16 x i32> @f2(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r1+#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai(i1 %v0, <16 x i32>* %a1, i32 192)
+ ret <16 x i32> %v1
+}
+
+define <16 x i32> @f3(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai(i1 %v0, <16 x i32>* %a1, i32 192)
+ ret <16 x i32> %v1
+}
+
+define <16 x i32>* @f4(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f4:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi(i1 %v0, <16 x i32>* %a1, i32 192)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f5(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f5:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi(i1 %v0, <16 x i32>* %a1, i32 192)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f6(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f6:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi(i1 %v0, <16 x i32>* %a1, i32 192)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f7(i32 %a0, <16 x i32>* %a1) #0 {
+; CHECK-LABEL: f7:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi(i1 %v0, <16 x i32>* %a1, i32 192)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f8(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++m0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f9(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f9:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f10(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f10:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) v0 = vmem(r0++m0):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define <16 x i32>* @f11(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0):nt
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2)
+ %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1
+ ret <16 x i32>* %v2
+}
+
+define void @f12(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f12:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.pred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret void
+}
+
+define void @f13(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f13:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.npred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret void
+}
+
+define void @f14(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f14:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmemu(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32Ub.pred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret void
+}
+
+define void @f15(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f15:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmemu(r1+#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32Ub.npred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret void
+}
+
+define void @f16(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f16:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r1+#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.nt.pred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret void
+}
+
+define void @f17(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f17:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r1+#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ call void @llvm.hexagon.V6.vS32b.nt.npred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret void
+}
+
+define <16 x i32>* @f18(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f18:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.pred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f19(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f19:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.npred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f20(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f20:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmemu(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f21(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f21:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmemu(r0++#-3) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f22(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f22:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f23(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 {
+; CHECK-LABEL: f23:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++#-3):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f24(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 {
+; CHECK-LABEL: f24:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f25(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 {
+; CHECK-LABEL: f25:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f26(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 {
+; CHECK-LABEL: f26:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmemu(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f27(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 {
+; CHECK-LABEL: f27:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmemu(r0++m0) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f28(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 {
+; CHECK-LABEL: f28:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) vmem(r0++m0):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3)
+ ret <16 x i32>* %v1
+}
+
+define <16 x i32>* @f29(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 {
+; CHECK-LABEL: f29:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = cmp.eq(r0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: m0 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) vmem(r0++m0):nt = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = icmp eq i32 %a0, 0
+ %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3)
+ ret <16 x i32>* %v1
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length64b,-packets" }
More information about the llvm-commits
mailing list