[llvm] 6a6ac3b - [Hexagon] Support BUILD_VECTOR of floating point HVX vectors
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 28 15:01:29 PST 2021
Author: Krzysztof Parzyszek
Date: 2021-12-28T14:59:08-08:00
New Revision: 6a6ac3b36fcdb44a5096f2ddab952a1281eb144e
URL: https://github.com/llvm/llvm-project/commit/6a6ac3b36fcdb44a5096f2ddab952a1281eb144e
DIFF: https://github.com/llvm/llvm-project/commit/6a6ac3b36fcdb44a5096f2ddab952a1281eb144e.diff
LOG: [Hexagon] Support BUILD_VECTOR of floating point HVX vectors
Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh at quicinc.com>
Co-authored-by: Ankit Aggarwal <aankit at quicinc.com>
Added:
llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll
Modified:
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/lib/Target/Hexagon/HexagonPatterns.td
llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 88effed9f0767..90dda37a886ab 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2720,7 +2720,6 @@ SDValue
HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
const {
if (Ty.isVector()) {
- assert(Ty.isInteger() && "Only integer vectors are supported here");
unsigned W = Ty.getSizeInBits();
if (W <= 64)
return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index f7237f496aee3..e189b0b49e345 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -55,6 +55,11 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
+ if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
+ addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
+ }
}
// Set up operation actions.
@@ -83,6 +88,21 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
+ Subtarget.useHVXFloatingPoint()) {
+ // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom);
+
+ // BUILD_VECTOR with f16 operands cannot be promoted without
+ // promoting the result, so lower the node to vsplat or constant pool
+ setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
+
+ // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
+ // independent) handling of it would convert it to a load, which is
+ // not always the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom);
+ }
+
for (MVT T : LegalV) {
setIndexedLoadAction(ISD::POST_INC, T, Legal);
setIndexedStoreAction(ISD::POST_INC, T, Legal);
@@ -497,7 +517,9 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
assert(ElemSize*VecLen == HwLen);
SmallVector<SDValue,32> Words;
- if (VecTy.getVectorElementType() != MVT::i32) {
+ if (VecTy.getVectorElementType() != MVT::i32 &&
+ !(Subtarget.useHVXFloatingPoint() &&
+ VecTy.getVectorElementType() == MVT::f32)) {
assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
@@ -506,22 +528,31 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
Words.push_back(DAG.getBitcast(MVT::i32, W));
}
} else {
- Words.assign(Values.begin(), Values.end());
+ for (SDValue V : Values)
+ Words.push_back(DAG.getBitcast(MVT::i32, V));
}
+ auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
+ unsigned NumValues = Values.size();
+ assert(NumValues > 0);
+ bool IsUndef = true;
+ for (unsigned i = 0; i != NumValues; ++i) {
+ if (Values[i].isUndef())
+ continue;
+ IsUndef = false;
+ if (!SplatV.getNode())
+ SplatV = Values[i];
+ else if (SplatV != Values[i])
+ return false;
+ }
+ if (IsUndef)
+ SplatV = Values[0];
+ return true;
+ };
unsigned NumWords = Words.size();
- bool IsSplat = true, IsUndef = true;
SDValue SplatV;
- for (unsigned i = 0; i != NumWords && IsSplat; ++i) {
- if (isUndef(Words[i]))
- continue;
- IsUndef = false;
- if (!SplatV.getNode())
- SplatV = Words[i];
- else if (SplatV != Words[i])
- IsSplat = false;
- }
- if (IsUndef)
+ bool IsSplat = isSplat(Words, SplatV);
+ if (IsSplat && isUndef(SplatV))
return DAG.getUNDEF(VecTy);
if (IsSplat) {
assert(SplatV.getNode());
@@ -634,8 +665,15 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy,
{HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)});
- SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1});
- return DstV;
+
+ SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
+ SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
+
+ SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
+
+ SDValue OutV =
+ DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
+ return OutV;
}
SDValue
@@ -1237,6 +1275,19 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
if (VecTy.getVectorElementType() == MVT::i1)
return buildHvxVectorPred(Ops, dl, VecTy, DAG);
+ // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
+ // not a legal type, just bitcast the node to use i16
+ // types and bitcast the result back to f16
+ if (VecTy.getVectorElementType() == MVT::f16) {
+ SmallVector<SDValue,64> NewOps;
+ for (unsigned i = 0; i != Size; i++)
+ NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
+
+ SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ tyVector(VecTy, MVT::i16), NewOps);
+ return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+ }
+
if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
ArrayRef<SDValue> A(Ops);
MVT SingleTy = typeSplit(VecTy).first;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cad5ca8ab92ec..4ba6d4740e126 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -87,18 +87,6 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
-def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>;
-def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>;
-def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>;
-
-def HVI8: PatLeaf<(VecI8 HvxVR:$R)>;
-def HVI16: PatLeaf<(VecI16 HvxVR:$R)>;
-def HVI32: PatLeaf<(VecI32 HvxVR:$R)>;
-
-def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
-def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
-def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
-
def SDTVecLeaf:
SDTypeProfile<1, 0, [SDTCisVec<0>]>;
def SDTVecVecIntOp:
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index a22a3f8ec0caa..15fa659d26aba 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -6,6 +6,21 @@
//
//===----------------------------------------------------------------------===//
+def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>;
+def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>;
+def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>;
+
+def HVI8: PatLeaf<(VecI8 HvxVR:$R)>;
+def HVI16: PatLeaf<(VecI16 HvxVR:$R)>;
+def HVI32: PatLeaf<(VecI32 HvxVR:$R)>;
+def HVF16: PatLeaf<(VecF16 HvxVR:$R)>;
+def HVF32: PatLeaf<(VecF32 HvxVR:$R)>;
+
+def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
+def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
+def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
+def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>;
+def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>;
def SDTVecUnaryOp:
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
@@ -211,6 +226,24 @@ let Predicates = [UseHVX] in {
defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
}
+let Predicates = [UseHVX, UseHVXFloatingPoint] in {
+ defm: NopCast_pat<VecI8, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI8, VecF32, HvxVR>;
+ defm: NopCast_pat<VecI16, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI16, VecF32, HvxVR>;
+ defm: NopCast_pat<VecI32, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI32, VecF32, HvxVR>;
+ defm: NopCast_pat<VecF16, VecF32, HvxVR>;
+
+ defm: NopCast_pat<VecPI8, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI8, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPI32, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI32, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPF16, VecPF32, HvxWR>;
+}
+
let Predicates = [UseHVX] in {
let AddedComplexity = 100 in {
// These should be preferred over a vsplat of 0.
@@ -251,6 +284,13 @@ let Predicates = [UseHVX] in {
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
}
+let Predicates = [UseHVXFloatingPoint] in {
+ def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+}
+
// Splats for HvxV60
def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>;
def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>;
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 8b7138d3c809d..4c387c8ba638b 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -479,6 +479,10 @@ def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v32i16, v64i16, v32i16]>;
def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v16i32, v32i32, v16i32]>;
+def VecF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32f16, v64f16, v32f16]>;
+def VecF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v16f32, v32f32, v16f32]>;
def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v128i8, v256i8, v128i8]>;
@@ -486,6 +490,10 @@ def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i16, v128i16, v64i16]>;
def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v32i32, v64i32, v32i32]>;
+def VecPF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v64f16, v128f16, v64f16]>;
+def VecPF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32f32, v64f32, v32f32]>;
def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i1, v128i1, v64i1]>;
@@ -496,13 +504,13 @@ def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
// HVX register classes
-def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
+def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecF32], 512,
(add (sequence "V%u", 0, 31), VTMP)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
}
-def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
+def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPF32], 1024,
(add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll
new file mode 100644
index 0000000000000..2eba9e2db446c
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll
@@ -0,0 +1,504 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that this code does compile.
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: f0:
+; CHECK: vinsert
+define <32 x float> @f0(i32* %a0, float* %a1) #0 {
+b0:
+ %v0 = getelementptr i32, i32* %a0, i32 0
+ %v1 = load i32, i32* %v0, align 4
+ %v2 = getelementptr float, float* %a1, i32 %v1
+ %v3 = load float, float* %v2, align 4
+ %v4 = insertelement <32 x float> undef, float %v3, i32 0
+ %v5 = getelementptr i32, i32* %a0, i32 1
+ %v6 = load i32, i32* %v5, align 4
+ %v7 = getelementptr float, float* %a1, i32 %v6
+ %v8 = load float, float* %v7, align 4
+ %v9 = insertelement <32 x float> %v4, float %v8, i32 1
+ %v10 = getelementptr i32, i32* %a0, i32 2
+ %v11 = load i32, i32* %v10, align 4
+ %v12 = getelementptr float, float* %a1, i32 %v11
+ %v13 = load float, float* %v12, align 4
+ %v14 = insertelement <32 x float> %v9, float %v13, i32 2
+ %v15 = getelementptr i32, i32* %a0, i32 3
+ %v16 = load i32, i32* %v15, align 4
+ %v17 = getelementptr float, float* %a1, i32 %v16
+ %v18 = load float, float* %v17, align 4
+ %v19 = insertelement <32 x float> %v14, float %v18, i32 3
+ %v20 = getelementptr i32, i32* %a0, i32 4
+ %v21 = load i32, i32* %v20, align 4
+ %v22 = getelementptr float, float* %a1, i32 %v21
+ %v23 = load float, float* %v22, align 4
+ %v24 = insertelement <32 x float> %v19, float %v23, i32 4
+ %v25 = getelementptr i32, i32* %a0, i32 5
+ %v26 = load i32, i32* %v25, align 4
+ %v27 = getelementptr float, float* %a1, i32 %v26
+ %v28 = load float, float* %v27, align 4
+ %v29 = insertelement <32 x float> %v24, float %v28, i32 5
+ %v30 = getelementptr i32, i32* %a0, i32 6
+ %v31 = load i32, i32* %v30, align 4
+ %v32 = getelementptr float, float* %a1, i32 %v31
+ %v33 = load float, float* %v32, align 4
+ %v34 = insertelement <32 x float> %v29, float %v33, i32 6
+ %v35 = getelementptr i32, i32* %a0, i32 7
+ %v36 = load i32, i32* %v35, align 4
+ %v37 = getelementptr float, float* %a1, i32 %v36
+ %v38 = load float, float* %v37, align 4
+ %v39 = insertelement <32 x float> %v34, float %v38, i32 7
+ %v40 = getelementptr i32, i32* %a0, i32 8
+ %v41 = load i32, i32* %v40, align 4
+ %v42 = getelementptr float, float* %a1, i32 %v41
+ %v43 = load float, float* %v42, align 4
+ %v44 = insertelement <32 x float> %v39, float %v43, i32 8
+ %v45 = getelementptr i32, i32* %a0, i32 9
+ %v46 = load i32, i32* %v45, align 4
+ %v47 = getelementptr float, float* %a1, i32 %v46
+ %v48 = load float, float* %v47, align 4
+ %v49 = insertelement <32 x float> %v44, float %v48, i32 9
+ %v50 = getelementptr i32, i32* %a0, i32 10
+ %v51 = load i32, i32* %v50, align 4
+ %v52 = getelementptr float, float* %a1, i32 %v51
+ %v53 = load float, float* %v52, align 4
+ %v54 = insertelement <32 x float> %v49, float %v53, i32 10
+ %v55 = getelementptr i32, i32* %a0, i32 11
+ %v56 = load i32, i32* %v55, align 4
+ %v57 = getelementptr float, float* %a1, i32 %v56
+ %v58 = load float, float* %v57, align 4
+ %v59 = insertelement <32 x float> %v54, float %v58, i32 11
+ %v60 = getelementptr i32, i32* %a0, i32 12
+ %v61 = load i32, i32* %v60, align 4
+ %v62 = getelementptr float, float* %a1, i32 %v61
+ %v63 = load float, float* %v62, align 4
+ %v64 = insertelement <32 x float> %v59, float %v63, i32 12
+ %v65 = getelementptr i32, i32* %a0, i32 13
+ %v66 = load i32, i32* %v65, align 4
+ %v67 = getelementptr float, float* %a1, i32 %v66
+ %v68 = load float, float* %v67, align 4
+ %v69 = insertelement <32 x float> %v64, float %v68, i32 13
+ %v70 = getelementptr i32, i32* %a0, i32 14
+ %v71 = load i32, i32* %v70, align 4
+ %v72 = getelementptr float, float* %a1, i32 %v71
+ %v73 = load float, float* %v72, align 4
+ %v74 = insertelement <32 x float> %v69, float %v73, i32 14
+ %v75 = getelementptr i32, i32* %a0, i32 15
+ %v76 = load i32, i32* %v75, align 4
+ %v77 = getelementptr float, float* %a1, i32 %v76
+ %v78 = load float, float* %v77, align 4
+ %v79 = insertelement <32 x float> %v74, float %v78, i32 15
+ %v80 = getelementptr i32, i32* %a0, i32 16
+ %v81 = load i32, i32* %v80, align 4
+ %v82 = getelementptr float, float* %a1, i32 %v81
+ %v83 = load float, float* %v82, align 4
+ %v84 = insertelement <32 x float> %v79, float %v83, i32 16
+ %v85 = getelementptr i32, i32* %a0, i32 17
+ %v86 = load i32, i32* %v85, align 4
+ %v87 = getelementptr float, float* %a1, i32 %v86
+ %v88 = load float, float* %v87, align 4
+ %v89 = insertelement <32 x float> %v84, float %v88, i32 17
+ %v90 = getelementptr i32, i32* %a0, i32 18
+ %v91 = load i32, i32* %v90, align 4
+ %v92 = getelementptr float, float* %a1, i32 %v91
+ %v93 = load float, float* %v92, align 4
+ %v94 = insertelement <32 x float> %v89, float %v93, i32 18
+ %v95 = getelementptr i32, i32* %a0, i32 19
+ %v96 = load i32, i32* %v95, align 4
+ %v97 = getelementptr float, float* %a1, i32 %v96
+ %v98 = load float, float* %v97, align 4
+ %v99 = insertelement <32 x float> %v94, float %v98, i32 19
+ %v100 = getelementptr i32, i32* %a0, i32 20
+ %v101 = load i32, i32* %v100, align 4
+ %v102 = getelementptr float, float* %a1, i32 %v101
+ %v103 = load float, float* %v102, align 4
+ %v104 = insertelement <32 x float> %v99, float %v103, i32 20
+ %v105 = getelementptr i32, i32* %a0, i32 21
+ %v106 = load i32, i32* %v105, align 4
+ %v107 = getelementptr float, float* %a1, i32 %v106
+ %v108 = load float, float* %v107, align 4
+ %v109 = insertelement <32 x float> %v104, float %v108, i32 21
+ %v110 = getelementptr i32, i32* %a0, i32 22
+ %v111 = load i32, i32* %v110, align 4
+ %v112 = getelementptr float, float* %a1, i32 %v111
+ %v113 = load float, float* %v112, align 4
+ %v114 = insertelement <32 x float> %v109, float %v113, i32 22
+ %v115 = getelementptr i32, i32* %a0, i32 23
+ %v116 = load i32, i32* %v115, align 4
+ %v117 = getelementptr float, float* %a1, i32 %v116
+ %v118 = load float, float* %v117, align 4
+ %v119 = insertelement <32 x float> %v114, float %v118, i32 23
+ %v120 = getelementptr i32, i32* %a0, i32 24
+ %v121 = load i32, i32* %v120, align 4
+ %v122 = getelementptr float, float* %a1, i32 %v121
+ %v123 = load float, float* %v122, align 4
+ %v124 = insertelement <32 x float> %v119, float %v123, i32 24
+ %v125 = getelementptr i32, i32* %a0, i32 25
+ %v126 = load i32, i32* %v125, align 4
+ %v127 = getelementptr float, float* %a1, i32 %v126
+ %v128 = load float, float* %v127, align 4
+ %v129 = insertelement <32 x float> %v124, float %v128, i32 25
+ %v130 = getelementptr i32, i32* %a0, i32 26
+ %v131 = load i32, i32* %v130, align 4
+ %v132 = getelementptr float, float* %a1, i32 %v131
+ %v133 = load float, float* %v132, align 4
+ %v134 = insertelement <32 x float> %v129, float %v133, i32 26
+ %v135 = getelementptr i32, i32* %a0, i32 27
+ %v136 = load i32, i32* %v135, align 4
+ %v137 = getelementptr float, float* %a1, i32 %v136
+ %v138 = load float, float* %v137, align 4
+ %v139 = insertelement <32 x float> %v134, float %v138, i32 27
+ %v140 = getelementptr i32, i32* %a0, i32 28
+ %v141 = load i32, i32* %v140, align 4
+ %v142 = getelementptr float, float* %a1, i32 %v141
+ %v143 = load float, float* %v142, align 4
+ %v144 = insertelement <32 x float> %v139, float %v143, i32 28
+ %v145 = getelementptr i32, i32* %a0, i32 29
+ %v146 = load i32, i32* %v145, align 4
+ %v147 = getelementptr float, float* %a1, i32 %v146
+ %v148 = load float, float* %v147, align 4
+ %v149 = insertelement <32 x float> %v144, float %v148, i32 29
+ %v150 = getelementptr i32, i32* %a0, i32 30
+ %v151 = load i32, i32* %v150, align 4
+ %v152 = getelementptr float, float* %a1, i32 %v151
+ %v153 = load float, float* %v152, align 4
+ %v154 = insertelement <32 x float> %v149, float %v153, i32 30
+ %v155 = getelementptr i32, i32* %a0, i32 31
+ %v156 = load i32, i32* %v155, align 4
+ %v157 = getelementptr float, float* %a1, i32 %v156
+ %v158 = load float, float* %v157, align 4
+ %v159 = insertelement <32 x float> %v154, float %v158, i32 31
+ ret <32 x float> %v159
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: f1:
+; CHECK: vinsert
+define <64 x half> @f1(i32* %a0, half* %a1) #0 {
+b0:
+ %v0 = getelementptr i32, i32* %a0, i32 0
+ %v1 = load i32, i32* %v0, align 4
+ %v2 = getelementptr half, half* %a1, i32 %v1
+ %v3 = load half, half* %v2, align 4
+ %v4 = insertelement <64 x half> undef, half %v3, i32 0
+ %v5 = getelementptr i32, i32* %a0, i32 1
+ %v6 = load i32, i32* %v5, align 4
+ %v7 = getelementptr half, half* %a1, i32 %v6
+ %v8 = load half, half* %v7, align 4
+ %v9 = insertelement <64 x half> %v4, half %v8, i32 1
+ %v10 = getelementptr i32, i32* %a0, i32 2
+ %v11 = load i32, i32* %v10, align 4
+ %v12 = getelementptr half, half* %a1, i32 %v11
+ %v13 = load half, half* %v12, align 4
+ %v14 = insertelement <64 x half> %v9, half %v13, i32 2
+ %v15 = getelementptr i32, i32* %a0, i32 3
+ %v16 = load i32, i32* %v15, align 4
+ %v17 = getelementptr half, half* %a1, i32 %v16
+ %v18 = load half, half* %v17, align 4
+ %v19 = insertelement <64 x half> %v14, half %v18, i32 3
+ %v20 = getelementptr i32, i32* %a0, i32 4
+ %v21 = load i32, i32* %v20, align 4
+ %v22 = getelementptr half, half* %a1, i32 %v21
+ %v23 = load half, half* %v22, align 4
+ %v24 = insertelement <64 x half> %v19, half %v23, i32 4
+ %v25 = getelementptr i32, i32* %a0, i32 5
+ %v26 = load i32, i32* %v25, align 4
+ %v27 = getelementptr half, half* %a1, i32 %v26
+ %v28 = load half, half* %v27, align 4
+ %v29 = insertelement <64 x half> %v24, half %v28, i32 5
+ %v30 = getelementptr i32, i32* %a0, i32 6
+ %v31 = load i32, i32* %v30, align 4
+ %v32 = getelementptr half, half* %a1, i32 %v31
+ %v33 = load half, half* %v32, align 4
+ %v34 = insertelement <64 x half> %v29, half %v33, i32 6
+ %v35 = getelementptr i32, i32* %a0, i32 7
+ %v36 = load i32, i32* %v35, align 4
+ %v37 = getelementptr half, half* %a1, i32 %v36
+ %v38 = load half, half* %v37, align 4
+ %v39 = insertelement <64 x half> %v34, half %v38, i32 7
+ %v40 = getelementptr i32, i32* %a0, i32 8
+ %v41 = load i32, i32* %v40, align 4
+ %v42 = getelementptr half, half* %a1, i32 %v41
+ %v43 = load half, half* %v42, align 4
+ %v44 = insertelement <64 x half> %v39, half %v43, i32 8
+ %v45 = getelementptr i32, i32* %a0, i32 9
+ %v46 = load i32, i32* %v45, align 4
+ %v47 = getelementptr half, half* %a1, i32 %v46
+ %v48 = load half, half* %v47, align 4
+ %v49 = insertelement <64 x half> %v44, half %v48, i32 9
+ %v50 = getelementptr i32, i32* %a0, i32 10
+ %v51 = load i32, i32* %v50, align 4
+ %v52 = getelementptr half, half* %a1, i32 %v51
+ %v53 = load half, half* %v52, align 4
+ %v54 = insertelement <64 x half> %v49, half %v53, i32 10
+ %v55 = getelementptr i32, i32* %a0, i32 11
+ %v56 = load i32, i32* %v55, align 4
+ %v57 = getelementptr half, half* %a1, i32 %v56
+ %v58 = load half, half* %v57, align 4
+ %v59 = insertelement <64 x half> %v54, half %v58, i32 11
+ %v60 = getelementptr i32, i32* %a0, i32 12
+ %v61 = load i32, i32* %v60, align 4
+ %v62 = getelementptr half, half* %a1, i32 %v61
+ %v63 = load half, half* %v62, align 4
+ %v64 = insertelement <64 x half> %v59, half %v63, i32 12
+ %v65 = getelementptr i32, i32* %a0, i32 13
+ %v66 = load i32, i32* %v65, align 4
+ %v67 = getelementptr half, half* %a1, i32 %v66
+ %v68 = load half, half* %v67, align 4
+ %v69 = insertelement <64 x half> %v64, half %v68, i32 13
+ %v70 = getelementptr i32, i32* %a0, i32 14
+ %v71 = load i32, i32* %v70, align 4
+ %v72 = getelementptr half, half* %a1, i32 %v71
+ %v73 = load half, half* %v72, align 4
+ %v74 = insertelement <64 x half> %v69, half %v73, i32 14
+ %v75 = getelementptr i32, i32* %a0, i32 15
+ %v76 = load i32, i32* %v75, align 4
+ %v77 = getelementptr half, half* %a1, i32 %v76
+ %v78 = load half, half* %v77, align 4
+ %v79 = insertelement <64 x half> %v74, half %v78, i32 15
+ %v80 = getelementptr i32, i32* %a0, i32 16
+ %v81 = load i32, i32* %v80, align 4
+ %v82 = getelementptr half, half* %a1, i32 %v81
+ %v83 = load half, half* %v82, align 4
+ %v84 = insertelement <64 x half> %v79, half %v83, i32 16
+ %v85 = getelementptr i32, i32* %a0, i32 17
+ %v86 = load i32, i32* %v85, align 4
+ %v87 = getelementptr half, half* %a1, i32 %v86
+ %v88 = load half, half* %v87, align 4
+ %v89 = insertelement <64 x half> %v84, half %v88, i32 17
+ %v90 = getelementptr i32, i32* %a0, i32 18
+ %v91 = load i32, i32* %v90, align 4
+ %v92 = getelementptr half, half* %a1, i32 %v91
+ %v93 = load half, half* %v92, align 4
+ %v94 = insertelement <64 x half> %v89, half %v93, i32 18
+ %v95 = getelementptr i32, i32* %a0, i32 19
+ %v96 = load i32, i32* %v95, align 4
+ %v97 = getelementptr half, half* %a1, i32 %v96
+ %v98 = load half, half* %v97, align 4
+ %v99 = insertelement <64 x half> %v94, half %v98, i32 19
+ %v100 = getelementptr i32, i32* %a0, i32 20
+ %v101 = load i32, i32* %v100, align 4
+ %v102 = getelementptr half, half* %a1, i32 %v101
+ %v103 = load half, half* %v102, align 4
+ %v104 = insertelement <64 x half> %v99, half %v103, i32 20
+ %v105 = getelementptr i32, i32* %a0, i32 21
+ %v106 = load i32, i32* %v105, align 4
+ %v107 = getelementptr half, half* %a1, i32 %v106
+ %v108 = load half, half* %v107, align 4
+ %v109 = insertelement <64 x half> %v104, half %v108, i32 21
+ %v110 = getelementptr i32, i32* %a0, i32 22
+ %v111 = load i32, i32* %v110, align 4
+ %v112 = getelementptr half, half* %a1, i32 %v111
+ %v113 = load half, half* %v112, align 4
+ %v114 = insertelement <64 x half> %v109, half %v113, i32 22
+ %v115 = getelementptr i32, i32* %a0, i32 23
+ %v116 = load i32, i32* %v115, align 4
+ %v117 = getelementptr half, half* %a1, i32 %v116
+ %v118 = load half, half* %v117, align 4
+ %v119 = insertelement <64 x half> %v114, half %v118, i32 23
+ %v120 = getelementptr i32, i32* %a0, i32 24
+ %v121 = load i32, i32* %v120, align 4
+ %v122 = getelementptr half, half* %a1, i32 %v121
+ %v123 = load half, half* %v122, align 4
+ %v124 = insertelement <64 x half> %v119, half %v123, i32 24
+ %v125 = getelementptr i32, i32* %a0, i32 25
+ %v126 = load i32, i32* %v125, align 4
+ %v127 = getelementptr half, half* %a1, i32 %v126
+ %v128 = load half, half* %v127, align 4
+ %v129 = insertelement <64 x half> %v124, half %v128, i32 25
+ %v130 = getelementptr i32, i32* %a0, i32 26
+ %v131 = load i32, i32* %v130, align 4
+ %v132 = getelementptr half, half* %a1, i32 %v131
+ %v133 = load half, half* %v132, align 4
+ %v134 = insertelement <64 x half> %v129, half %v133, i32 26
+ %v135 = getelementptr i32, i32* %a0, i32 27
+ %v136 = load i32, i32* %v135, align 4
+ %v137 = getelementptr half, half* %a1, i32 %v136
+ %v138 = load half, half* %v137, align 4
+ %v139 = insertelement <64 x half> %v134, half %v138, i32 27
+ %v140 = getelementptr i32, i32* %a0, i32 28
+ %v141 = load i32, i32* %v140, align 4
+ %v142 = getelementptr half, half* %a1, i32 %v141
+ %v143 = load half, half* %v142, align 4
+ %v144 = insertelement <64 x half> %v139, half %v143, i32 28
+ %v145 = getelementptr i32, i32* %a0, i32 29
+ %v146 = load i32, i32* %v145, align 4
+ %v147 = getelementptr half, half* %a1, i32 %v146
+ %v148 = load half, half* %v147, align 4
+ %v149 = insertelement <64 x half> %v144, half %v148, i32 29
+ %v150 = getelementptr i32, i32* %a0, i32 30
+ %v151 = load i32, i32* %v150, align 4
+ %v152 = getelementptr half, half* %a1, i32 %v151
+ %v153 = load half, half* %v152, align 4
+ %v154 = insertelement <64 x half> %v149, half %v153, i32 30
+ %v155 = getelementptr i32, i32* %a0, i32 31
+ %v156 = load i32, i32* %v155, align 4
+ %v157 = getelementptr half, half* %a1, i32 %v156
+ %v158 = load half, half* %v157, align 4
+ %v159 = insertelement <64 x half> %v154, half %v158, i32 31
+ %v160 = getelementptr i32, i32* %a0, i32 32
+ %v161 = load i32, i32* %v160, align 4
+ %v162 = getelementptr half, half* %a1, i32 %v161
+ %v163 = load half, half* %v162, align 4
+ %v164 = insertelement <64 x half> %v159, half %v163, i32 32
+ %v165 = getelementptr i32, i32* %a0, i32 33
+ %v166 = load i32, i32* %v165, align 4
+ %v167 = getelementptr half, half* %a1, i32 %v166
+ %v168 = load half, half* %v167, align 4
+ %v169 = insertelement <64 x half> %v164, half %v168, i32 33
+ %v170 = getelementptr i32, i32* %a0, i32 34
+ %v171 = load i32, i32* %v170, align 4
+ %v172 = getelementptr half, half* %a1, i32 %v171
+ %v173 = load half, half* %v172, align 4
+ %v174 = insertelement <64 x half> %v169, half %v173, i32 34
+ %v175 = getelementptr i32, i32* %a0, i32 35
+ %v176 = load i32, i32* %v175, align 4
+ %v177 = getelementptr half, half* %a1, i32 %v176
+ %v178 = load half, half* %v177, align 4
+ %v179 = insertelement <64 x half> %v174, half %v178, i32 35
+ %v180 = getelementptr i32, i32* %a0, i32 36
+ %v181 = load i32, i32* %v180, align 4
+ %v182 = getelementptr half, half* %a1, i32 %v181
+ %v183 = load half, half* %v182, align 4
+ %v184 = insertelement <64 x half> %v179, half %v183, i32 36
+ %v185 = getelementptr i32, i32* %a0, i32 37
+ %v186 = load i32, i32* %v185, align 4
+ %v187 = getelementptr half, half* %a1, i32 %v186
+ %v188 = load half, half* %v187, align 4
+ %v189 = insertelement <64 x half> %v184, half %v188, i32 37
+ %v190 = getelementptr i32, i32* %a0, i32 38
+ %v191 = load i32, i32* %v190, align 4
+ %v192 = getelementptr half, half* %a1, i32 %v191
+ %v193 = load half, half* %v192, align 4
+ %v194 = insertelement <64 x half> %v189, half %v193, i32 38
+ %v195 = getelementptr i32, i32* %a0, i32 39
+ %v196 = load i32, i32* %v195, align 4
+ %v197 = getelementptr half, half* %a1, i32 %v196
+ %v198 = load half, half* %v197, align 4
+ %v199 = insertelement <64 x half> %v194, half %v198, i32 39
+ %v200 = getelementptr i32, i32* %a0, i32 40
+ %v201 = load i32, i32* %v200, align 4
+ %v202 = getelementptr half, half* %a1, i32 %v201
+ %v203 = load half, half* %v202, align 4
+ %v204 = insertelement <64 x half> %v199, half %v203, i32 40
+ %v205 = getelementptr i32, i32* %a0, i32 41
+ %v206 = load i32, i32* %v205, align 4
+ %v207 = getelementptr half, half* %a1, i32 %v206
+ %v208 = load half, half* %v207, align 4
+ %v209 = insertelement <64 x half> %v204, half %v208, i32 41
+ %v210 = getelementptr i32, i32* %a0, i32 42
+ %v211 = load i32, i32* %v210, align 4
+ %v212 = getelementptr half, half* %a1, i32 %v211
+ %v213 = load half, half* %v212, align 4
+ %v214 = insertelement <64 x half> %v209, half %v213, i32 42
+ %v215 = getelementptr i32, i32* %a0, i32 43
+ %v216 = load i32, i32* %v215, align 4
+ %v217 = getelementptr half, half* %a1, i32 %v216
+ %v218 = load half, half* %v217, align 4
+ %v219 = insertelement <64 x half> %v214, half %v218, i32 43
+ %v220 = getelementptr i32, i32* %a0, i32 44
+ %v221 = load i32, i32* %v220, align 4
+ %v222 = getelementptr half, half* %a1, i32 %v221
+ %v223 = load half, half* %v222, align 4
+ %v224 = insertelement <64 x half> %v219, half %v223, i32 44
+ %v225 = getelementptr i32, i32* %a0, i32 45
+ %v226 = load i32, i32* %v225, align 4
+ %v227 = getelementptr half, half* %a1, i32 %v226
+ %v228 = load half, half* %v227, align 4
+ %v229 = insertelement <64 x half> %v224, half %v228, i32 45
+ %v230 = getelementptr i32, i32* %a0, i32 46
+ %v231 = load i32, i32* %v230, align 4
+ %v232 = getelementptr half, half* %a1, i32 %v231
+ %v233 = load half, half* %v232, align 4
+ %v234 = insertelement <64 x half> %v229, half %v233, i32 46
+ %v235 = getelementptr i32, i32* %a0, i32 47
+ %v236 = load i32, i32* %v235, align 4
+ %v237 = getelementptr half, half* %a1, i32 %v236
+ %v238 = load half, half* %v237, align 4
+ %v239 = insertelement <64 x half> %v234, half %v238, i32 47
+ %v240 = getelementptr i32, i32* %a0, i32 48
+ %v241 = load i32, i32* %v240, align 4
+ %v242 = getelementptr half, half* %a1, i32 %v241
+ %v243 = load half, half* %v242, align 4
+ %v244 = insertelement <64 x half> %v239, half %v243, i32 48
+ %v245 = getelementptr i32, i32* %a0, i32 49
+ %v246 = load i32, i32* %v245, align 4
+ %v247 = getelementptr half, half* %a1, i32 %v246
+ %v248 = load half, half* %v247, align 4
+ %v249 = insertelement <64 x half> %v244, half %v248, i32 49
+ %v250 = getelementptr i32, i32* %a0, i32 50
+ %v251 = load i32, i32* %v250, align 4
+ %v252 = getelementptr half, half* %a1, i32 %v251
+ %v253 = load half, half* %v252, align 4
+ %v254 = insertelement <64 x half> %v249, half %v253, i32 50
+ %v255 = getelementptr i32, i32* %a0, i32 51
+ %v256 = load i32, i32* %v255, align 4
+ %v257 = getelementptr half, half* %a1, i32 %v256
+ %v258 = load half, half* %v257, align 4
+ %v259 = insertelement <64 x half> %v254, half %v258, i32 51
+ %v260 = getelementptr i32, i32* %a0, i32 52
+ %v261 = load i32, i32* %v260, align 4
+ %v262 = getelementptr half, half* %a1, i32 %v261
+ %v263 = load half, half* %v262, align 4
+ %v264 = insertelement <64 x half> %v259, half %v263, i32 52
+ %v265 = getelementptr i32, i32* %a0, i32 53
+ %v266 = load i32, i32* %v265, align 4
+ %v267 = getelementptr half, half* %a1, i32 %v266
+ %v268 = load half, half* %v267, align 4
+ %v269 = insertelement <64 x half> %v264, half %v268, i32 53
+ %v270 = getelementptr i32, i32* %a0, i32 54
+ %v271 = load i32, i32* %v270, align 4
+ %v272 = getelementptr half, half* %a1, i32 %v271
+ %v273 = load half, half* %v272, align 4
+ %v274 = insertelement <64 x half> %v269, half %v273, i32 54
+ %v275 = getelementptr i32, i32* %a0, i32 55
+ %v276 = load i32, i32* %v275, align 4
+ %v277 = getelementptr half, half* %a1, i32 %v276
+ %v278 = load half, half* %v277, align 4
+ %v279 = insertelement <64 x half> %v274, half %v278, i32 55
+ %v280 = getelementptr i32, i32* %a0, i32 56
+ %v281 = load i32, i32* %v280, align 4
+ %v282 = getelementptr half, half* %a1, i32 %v281
+ %v283 = load half, half* %v282, align 4
+ %v284 = insertelement <64 x half> %v279, half %v283, i32 56
+ %v285 = getelementptr i32, i32* %a0, i32 57
+ %v286 = load i32, i32* %v285, align 4
+ %v287 = getelementptr half, half* %a1, i32 %v286
+ %v288 = load half, half* %v287, align 4
+ %v289 = insertelement <64 x half> %v284, half %v288, i32 57
+ %v290 = getelementptr i32, i32* %a0, i32 58
+ %v291 = load i32, i32* %v290, align 4
+ %v292 = getelementptr half, half* %a1, i32 %v291
+ %v293 = load half, half* %v292, align 4
+ %v294 = insertelement <64 x half> %v289, half %v293, i32 58
+ %v295 = getelementptr i32, i32* %a0, i32 59
+ %v296 = load i32, i32* %v295, align 4
+ %v297 = getelementptr half, half* %a1, i32 %v296
+ %v298 = load half, half* %v297, align 4
+ %v299 = insertelement <64 x half> %v294, half %v298, i32 59
+ %v300 = getelementptr i32, i32* %a0, i32 60
+ %v301 = load i32, i32* %v300, align 4
+ %v302 = getelementptr half, half* %a1, i32 %v301
+ %v303 = load half, half* %v302, align 4
+ %v304 = insertelement <64 x half> %v299, half %v303, i32 60
+ %v305 = getelementptr i32, i32* %a0, i32 61
+ %v306 = load i32, i32* %v305, align 4
+ %v307 = getelementptr half, half* %a1, i32 %v306
+ %v308 = load half, half* %v307, align 4
+ %v309 = insertelement <64 x half> %v304, half %v308, i32 61
+ %v310 = getelementptr i32, i32* %a0, i32 62
+ %v311 = load i32, i32* %v310, align 4
+ %v312 = getelementptr half, half* %a1, i32 %v311
+ %v313 = load half, half* %v312, align 4
+ %v314 = insertelement <64 x half> %v309, half %v313, i32 62
+ %v315 = getelementptr i32, i32* %a0, i32 63
+ %v316 = load i32, i32* %v315, align 4
+ %v317 = getelementptr half, half* %a1, i32 %v316
+ %v318 = load half, half* %v317, align 4
+ %v319 = insertelement <64 x half> %v314, half %v318, i32 63
+ ret <64 x half> %v319
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }
More information about the llvm-commits
mailing list