[llvm] bb877d1 - [Hexagon] Widen loads and handle any-/sign-/zero-extensions
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 14 16:11:37 PDT 2020
Author: Krzysztof Parzyszek
Date: 2020-09-14T18:10:23-05:00
New Revision: bb877d1af2ec2f624caa380350c8da00c984e754
URL: https://github.com/llvm/llvm-project/commit/bb877d1af2ec2f624caa380350c8da00c984e754
DIFF: https://github.com/llvm/llvm-project/commit/bb877d1af2ec2f624caa380350c8da00c984e754.diff
LOG: [Hexagon] Widen loads and handle any-/sign-/zero-extensions
Added:
llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll
Modified:
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/lib/Target/Hexagon/HexagonISelLowering.h
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 645d28de2b20..20e5e5a91b12 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1863,6 +1863,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
case HexagonISD::VPACKL: return "HexagonISD::VPACKL";
+ case HexagonISD::VUNPACK: return "HexagonISD::VUNPACK";
+ case HexagonISD::VUNPACKU: return "HexagonISD::VUNPACKU";
case HexagonISD::OP_END: break;
}
return nullptr;
@@ -2650,6 +2652,28 @@ HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
llvm_unreachable("Invalid type for zero");
}
+SDValue
+HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
+ const {
+ MVT ValTy = ty(Val);
+ assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
+
+ unsigned ValLen = ValTy.getVectorNumElements();
+ unsigned ResLen = ResTy.getVectorNumElements();
+ if (ValLen == ResLen)
+ return Val;
+
+ const SDLoc &dl(Val);
+ assert(ValLen < ResLen);
+ assert(ResLen % ValLen == 0);
+
+ SmallVector<SDValue, 4> Concats = {Val};
+ for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
+ Concats.push_back(DAG.getUNDEF(ValTy));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
+}
+
SDValue
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index a396ff8ef7ec..cc34a4cd0396 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -94,6 +94,8 @@ enum NodeType : unsigned {
// the low halfwords and pack them into the first 32
// halfwords of the output. The rest of the output is
// unspecified.
+ VUNPACK, // Unpacking into low elements with sign extension.
+ VUNPACKU, // Unpacking into low elements with zero extension.
OP_END
};
@@ -367,6 +369,7 @@ class HexagonTargetLowering : public TargetLowering {
SDValue contractPredicate(SDValue Vec64, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG) const;
bool isUndef(SDValue Op) const {
if (Op.isMachineOpcode())
@@ -481,7 +484,9 @@ class HexagonTargetLowering : public TargetLowering {
SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const;
SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const;
std::pair<const TargetRegisterClass*, uint8_t>
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 51804e5f5327..a61d79ab3364 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -234,8 +234,12 @@ HexagonTargetLowering::initializeHVXLowering() {
MVT VecTy = MVT::getVectorVT(ElemTy, N);
auto Action = getPreferredVectorAction(VecTy);
if (Action == TargetLoweringBase::TypeWidenVector) {
- setOperationAction(ISD::STORE, VecTy, Custom);
- setOperationAction(ISD::TRUNCATE, VecTy, Custom);
+ setOperationAction(ISD::LOAD, VecTy, Custom);
+ setOperationAction(ISD::STORE, VecTy, Custom);
+ setOperationAction(ISD::TRUNCATE, VecTy, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VecTy, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom);
}
}
}
@@ -1886,6 +1890,38 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable(Name.c_str());
}
+SDValue
+HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ auto *LoadN = cast<LoadSDNode>(Op.getNode());
+ assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
+ assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
+ "Not widening loads of i1 yet");
+
+ SDValue Chain = LoadN->getChain();
+ SDValue Base = LoadN->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(MVT::i32);
+
+ MVT ResTy = ty(Op);
+ unsigned HwLen = Subtarget.getVectorLength();
+ unsigned ResLen = ResTy.getStoreSize();
+ assert(ResLen < HwLen && "vsetq(v1) prerequisite");
+
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+ SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
+ {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
+
+ MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
+
+ SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
+ DAG.getUNDEF(LoadTy), LoadTy, MemOp,
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+ SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
+ return DAG.getMergeValues({Value, Chain}, dl);
+}
+
SDValue
HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
const SDLoc &dl(Op);
@@ -1912,12 +1948,45 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
- SDValue StoreQ = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
- {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
+ SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
+ {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
MachineFunction &MF = DAG.getMachineFunction();
- auto *MOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
- return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, StoreQ, ty(Value),
- MOp, ISD::UNINDEXED, false, false);
+ auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
+ return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
+ MemOp, ISD::UNINDEXED, false, false);
+}
+
+SDValue
+HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned HwWidth = 8*Subtarget.getVectorLength();
+
+ SDValue Op0 = Op.getOperand(0);
+ MVT ResTy = ty(Op);
+ MVT OpTy = ty(Op0);
+ if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
+ return SDValue();
+
+ // .-res, op-> ScalarVec Illegal HVX
+ // Scalar ok - -
+ // Illegal widen(insert) widen -
+ // HVX - widen ok
+
+ auto getFactor = [HwWidth](MVT Ty) {
+ unsigned Width = Ty.getSizeInBits();
+ return HwWidth > Width ? HwWidth / Width : 1;
+ };
+
+ auto getWideTy = [getFactor](MVT Ty) {
+ unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
+ return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
+ };
+
+ unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK
+ : HexagonISD::VUNPACKU;
+ SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
+ SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp);
+ return WideRes;
}
SDValue
@@ -1931,10 +2000,10 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
return SDValue();
- // .-res, op-> Scalar Illegal HVX
- // Scalar ok extract(widen) -
- // Illegal - widen widen
- // HVX - - ok
+ // .-res, op-> ScalarVec Illegal HVX
+ // Scalar ok extract(widen) -
+ // Illegal - widen widen
+ // HVX - - ok
auto getFactor = [HwWidth](MVT Ty) {
unsigned Width = Ty.getSizeInBits();
@@ -1952,17 +2021,13 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?");
- MVT WideOpTy = getWideTy(OpTy);
- SmallVector<SDValue, 4> Concats = {Op0};
- for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i)
- Concats.push_back(DAG.getUNDEF(OpTy));
-
- SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats);
- SDValue V = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat);
+ SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
+ SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy),
+ WideOp);
// If the original result wasn't legal and was supposed to be widened,
// we're done.
if (shouldWidenToHvx(ResTy, DAG))
- return V;
+ return WideRes;
// The original result type wasn't meant to be widened to HVX, so
// leave it as it is. Standard legalization should be able to deal
@@ -1970,7 +2035,7 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
// node).
assert(ResTy.isVector());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
- {V, getZero(dl, MVT::i32, DAG)});
+ {WideRes, getZero(dl, MVT::i32, DAG)});
}
SDValue
@@ -2053,12 +2118,18 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
SDValue Op(N, 0);
switch (Opc) {
- case ISD::TRUNCATE: {
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?");
+ if (SDValue T = WidenHvxExtend(Op, DAG))
+ Results.push_back(T);
+ break;
+ case ISD::TRUNCATE:
assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?");
if (SDValue T = WidenHvxTruncate(Op, DAG))
Results.push_back(T);
break;
- }
case ISD::STORE: {
assert(shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG) &&
"Not widening?");
@@ -2089,11 +2160,25 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
unsigned Opc = N->getOpcode();
SDValue Op(N, 0);
switch (Opc) {
- case ISD::TRUNCATE: {
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?");
+ if (SDValue T = WidenHvxExtend(Op, DAG))
+ Results.push_back(T);
+ break;
+ case ISD::TRUNCATE:
assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?");
if (SDValue T = WidenHvxTruncate(Op, DAG))
Results.push_back(T);
break;
+ case ISD::LOAD: {
+ assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?");
+ SDValue Load = WidenHvxLoad(Op, DAG);
+ assert(Load->getOpcode() == ISD::MERGE_VALUES);
+ Results.push_back(Load.getOperand(0));
+ Results.push_back(Load.getOperand(1));
+ break;
}
case ISD::BITCAST:
if (isHvxBoolTy(ty(N->getOperand(0)))) {
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 630fd7a17040..64e24f246626 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -41,6 +41,8 @@ def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>;
def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>;
def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>;
def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>;
+def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>;
+def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>;
def vzero: PatFrag<(ops), (HexagonVZERO)>;
def qtrue: PatFrag<(ops), (HexagonQTRUE)>;
@@ -48,8 +50,10 @@ def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
def qcat: PatFrag<(ops node:$Qs, node:$Qt),
(HexagonQCAT node:$Qs, node:$Qt)>;
-def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
-def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
+def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
+def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
+def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>;
+def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>;
def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>;
def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
@@ -416,6 +420,20 @@ let Predicates = [UseHVX] in {
def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>;
def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
+ def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
+ def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
+ def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>;
+ def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>;
+ def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>;
+ def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>;
+
+ def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
+ def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
+ def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>;
+ def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>;
+ def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>;
+ def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>;
+
def: Pat<(VecI16 (bswap HVI16:$Vs)),
(V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>;
def: Pat<(VecI32 (bswap HVI32:$Vs)),
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll b/llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll
new file mode 100644
index 000000000000..eb4f11522082
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/widen-ext.ll
@@ -0,0 +1,99 @@
+; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s
+
+; v32i8 -> v32i16
+; CHECK-LABEL: f0:
+; CHECK: r[[R0:[0-9]+]] = #64
+; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
+; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
+; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
+define void @f0(<32 x i8>* %a0, <32 x i16>* %a1) #0 {
+ %v0 = load <32 x i8>, <32 x i8>* %a0, align 128
+ %v1 = sext <32 x i8> %v0 to <32 x i16>
+ store <32 x i16> %v1, <32 x i16>* %a1, align 128
+ ret void
+}
+
+; v32i8 -> v32i32
+; CHECK-LABEL: f1:
+; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
+; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
+; CHECK: vmem(r1+#0) = v[[V4]]
+define void @f1(<32 x i8>* %a0, <32 x i32>* %a1) #0 {
+ %v0 = load <32 x i8>, <32 x i8>* %a0, align 128
+ %v1 = sext <32 x i8> %v0 to <32 x i32>
+ store <32 x i32> %v1, <32 x i32>* %a1, align 128
+ ret void
+}
+
+; v64i8 -> v64i16
+; CHECK-LABEL: f2:
+; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
+; CHECK: vmem(r1+#0) = v[[V2]]
+define void @f2(<64 x i8>* %a0, <64 x i16>* %a1) #0 {
+ %v0 = load <64 x i8>, <64 x i8>* %a0, align 128
+ %v1 = sext <64 x i8> %v0 to <64 x i16>
+ store <64 x i16> %v1, <64 x i16>* %a1, align 128
+ ret void
+}
+
+; v64i8 -> v64i32
+; CHECK-LABEL: f3:
+; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
+; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
+; CHECK-DAG: vmem(r1+#0) = v[[V4]]
+; CHECK-DAG: vmem(r1+#1) = v[[V3]]
+define void @f3(<64 x i8>* %a0, <64 x i32>* %a1) #0 {
+ %v0 = load <64 x i8>, <64 x i8>* %a0, align 128
+ %v1 = sext <64 x i8> %v0 to <64 x i32>
+ store <64 x i32> %v1, <64 x i32>* %a1, align 128
+ ret void
+}
+
+; v16i16 -> v16i32
+; CHECK-LABEL: f4:
+; CHECK: r[[R0:[0-9]+]] = #64
+; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h)
+; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
+; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
+define void @f4(<16 x i16>* %a0, <16 x i32>* %a1) #0 {
+ %v0 = load <16 x i16>, <16 x i16>* %a0, align 128
+ %v1 = sext <16 x i16> %v0 to <16 x i32>
+ store <16 x i32> %v1, <16 x i32>* %a1, align 128
+ ret void
+}
+
+; v32i16 -> v32i32
+; CHECK-LABEL: f5:
+; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h)
+; CHECK: vmem(r1+#0) = v[[V2]]
+define void @f5(<32 x i16>* %a0, <32 x i32>* %a1) #0 {
+ %v0 = load <32 x i16>, <32 x i16>* %a0, align 128
+ %v1 = sext <32 x i16> %v0 to <32 x i32>
+ store <32 x i32> %v1, <32 x i32>* %a1, align 128
+ ret void
+}
+
+; v8i8 -> v8i32
+; CHECK-LABEL: f6:
+; CHECK: r[[R0:[0-9]+]]:[[R1:[0-9]+]] = memd(r0+#0)
+; CHECK-DAG: v[[V0:[0-9]+]].w = vinsert(r[[R0]])
+; CHECK-DAG: v[[V0]].w = vinsert(r[[R1]])
+; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
+; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
+; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
+; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V4]]
+define void @f6(<8 x i8>* %a0, <8 x i32>* %a1) #0 {
+ %v0 = load <8 x i8>, <8 x i8>* %a0, align 128
+ %v1 = sext <8 x i8> %v0 to <8 x i32>
+ store <8 x i32> %v1, <8 x i32>* %a1, align 128
+ ret void
+}
+
+attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length128b,-packets" }
+
More information about the llvm-commits
mailing list