[llvm] f528816 - [Hexagon] Move selection of HVX multiply from lowering to patterns
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 2 14:04:50 PDT 2020
Author: Krzysztof Parzyszek
Date: 2020-10-02T16:04:34-05:00
New Revision: f528816d586a42e4cf27af5b2fa9ba91213307aa
URL: https://github.com/llvm/llvm-project/commit/f528816d586a42e4cf27af5b2fa9ba91213307aa
DIFF: https://github.com/llvm/llvm-project/commit/f528816d586a42e4cf27af5b2fa9ba91213307aa.diff
LOG: [Hexagon] Move selection of HVX multiply from lowering to patterns
Also, change i32*i32 to V6_vmpyieoh + V6_vmpyiewuh_acc, which works
on V60 as well.
Added:
Modified:
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
llvm/test/CodeGen/Hexagon/autohvx/arith.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index e87ef08d8ed5..ee200b32ae77 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -91,6 +91,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, T, Legal);
setOperationAction(ISD::ADD, T, Legal);
setOperationAction(ISD::SUB, T, Legal);
+ setOperationAction(ISD::MUL, T, Legal);
setOperationAction(ISD::CTPOP, T, Legal);
setOperationAction(ISD::CTLZ, T, Legal);
if (T != ByteV) {
@@ -103,7 +104,6 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::MLOAD, T, Custom);
setOperationAction(ISD::MSTORE, T, Custom);
- setOperationAction(ISD::MUL, T, Custom);
setOperationAction(ISD::MULHS, T, Custom);
setOperationAction(ISD::MULHU, T, Custom);
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
@@ -1444,73 +1444,6 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
{VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
}
-SDValue
-HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
- MVT ResTy = ty(Op);
- assert(ResTy.isVector() && isHvxSingleTy(ResTy));
- const SDLoc &dl(Op);
- SmallVector<int,256> ShuffMask;
-
- MVT ElemTy = ResTy.getVectorElementType();
- unsigned VecLen = ResTy.getVectorNumElements();
- SDValue Vs = Op.getOperand(0);
- SDValue Vt = Op.getOperand(1);
-
- switch (ElemTy.SimpleTy) {
- case MVT::i8: {
- // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
- // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
- // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
- MVT ExtTy = typeExtElem(ResTy, 2);
- unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
- : Hexagon::V6_vmpyhv;
- SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
-
- // Discard high halves of the resulting values, collect the low halves.
- for (unsigned I = 0; I < VecLen; I += 2) {
- ShuffMask.push_back(I); // Pick even element.
- ShuffMask.push_back(I+VecLen); // Pick odd element.
- }
- VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
- SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
- return DAG.getBitcast(ResTy, BS);
- }
- case MVT::i16:
- // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
- // (There is also V6_vmpyhv, which behaves in an analogous way to
- // V6_vmpybv.)
- return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG);
- case MVT::i32: {
- auto MulL_V60 = [&](SDValue Vs, SDValue Vt) {
- // Use the following sequence for signed word multiply:
- // T0 = V6_vmpyiowh Vs, Vt
- // T1 = V6_vaslw T0, 16
- // T2 = V6_vmpyiewuh_acc T1, Vs, Vt
- SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
- SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
- SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
- SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
- {T1, Vs, Vt}, DAG);
- return T2;
- };
- auto MulL_V62 = [&](SDValue Vs, SDValue Vt) {
- MVT PairTy = typeJoin({ResTy, ResTy});
- SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy,
- {Vs, Vt}, DAG);
- SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
- {T0, Vs, Vt}, DAG);
- return opSplit(T1, dl, DAG).first;
- };
- if (Subtarget.useHVXV62Ops())
- return MulL_V62(Vs, Vt);
- return MulL_V60(Vs, Vt);
- }
- default:
- break;
- }
- return SDValue();
-}
-
SDValue
HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
@@ -2100,7 +2033,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerHvxShift(Op, DAG);
- case ISD::MUL: return LowerHvxMul(Op, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerHvxMulh(Op, DAG);
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index b84c6eb27fe2..c03e1c792583 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -316,6 +316,20 @@ let Predicates = [UseHVX] in {
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
+let Predicates = [UseHVX] in {
+ // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
+ // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
+ // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
+ def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
+ (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
+ (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
+ def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
+ (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
+ (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
+ HvxVR:$Vs, HvxVR:$Vt)>;
+}
+
let Predicates = [UseHVX] in {
def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
index 99e287dce214..348f3dd1df05 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
@@ -223,16 +223,16 @@ define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
; --- mul
; CHECK-LABEL: mpyb_64:
-; CHECK: v[[H00:[0-9]+]]:[[L00:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H00]].b,v[[L00]].b)
+; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
+; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
%p = mul <64 x i8> %v0, %v1
ret <64 x i8> %p
}
; CHECK-LABEL: mpyb_128:
-; CHECK: v[[H10:[0-9]+]]:[[L10:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H10]].b,v[[L10]].b)
+; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
+; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
%p = mul <128 x i8> %v0, %v1
ret <128 x i8> %p
@@ -252,43 +252,21 @@ define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
ret <64 x i16> %p
}
-; CHECK-LABEL: mpyw_64_v60:
-; CHECK-DAG: r[[T00:[0-9]+]] = #16
-; CHECK-DAG: v[[T01:[0-9]+]].w = vmpyio(v0.w,v1.h)
-; CHECK: v[[T02:[0-9]+]].w = vasl(v[[T01]].w,r[[T00]])
-; CHECK: v[[T02]].w += vmpyie(v0.w,v1.uh)
-define <16 x i32> @mpyw_64_v60(<16 x i32> %v0, <16 x i32> %v1) #0 {
+; CHECK-LABEL: mpyw_64:
+; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
+; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
+define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
%p = mul <16 x i32> %v0, %v1
ret <16 x i32> %p
}
-; CHECK-LABEL: mpyw_128_v60:
-; CHECK-DAG: r[[T10:[0-9]+]] = #16
-; CHECK-DAG: v[[T11:[0-9]+]].w = vmpyio(v0.w,v1.h)
-; CHECK: v[[T12:[0-9]+]].w = vasl(v[[T11]].w,r[[T10]])
-; CHECK: v[[T12]].w += vmpyie(v0.w,v1.uh)
-define <32 x i32> @mpyw_128_v60(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = mul <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-; CHECK-LABEL: mpyw_64_v62:
-; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
-; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
-define <16 x i32> @mpyw_64_v62(<16 x i32> %v0, <16 x i32> %v1) #3 {
- %p = mul <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: mpyw_128_v62:
-; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
-; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
-define <32 x i32> @mpyw_128_v62(<32 x i32> %v0, <32 x i32> %v1) #4 {
+; CHECK-LABEL: mpyw_128:
+; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
+; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
+define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
%p = mul <32 x i32> %v0, %v1
ret <32 x i32> %p
}
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
-attributes #3 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
-attributes #4 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" }
More information about the llvm-commits
mailing list