[llvm] 3c81757 - [Hexagon] Handle shifts of short vectors of i8
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 8 07:52:42 PDT 2022
Author: Krzysztof Parzyszek
Date: 2022-09-08T07:52:16-07:00
New Revision: 3c817574c2db9dbb003914dc40d58fe1dcfda855
URL: https://github.com/llvm/llvm-project/commit/3c817574c2db9dbb003914dc40d58fe1dcfda855
DIFF: https://github.com/llvm/llvm-project/commit/3c817574c2db9dbb003914dc40d58fe1dcfda855.diff
LOG: [Hexagon] Handle shifts of short vectors of i8
Added:
llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll
Modified:
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 94411b2e4f981..f2076a678669d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2345,7 +2345,49 @@ HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
SDValue
HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
- return getVectorShiftByInt(Op, DAG);
+ const SDLoc &dl(Op);
+
+ // First try to convert the shift (by vector) to a shift by a scalar.
+ // If we first split the shift, the shift amount will become 'extract
+ // subvector', and will no longer be recognized as scalar.
+ SDValue Res = Op;
+ if (SDValue S = getVectorShiftByInt(Op, DAG))
+ Res = S;
+
+ MVT ResTy = ty(Res);
+ if (ResTy.getVectorElementType() != MVT::i8)
+ return Res;
+
+ // For shifts of i8, extend the inputs to i16, then truncate back to i8.
+ assert(ResTy.getVectorElementType() == MVT::i8);
+ unsigned Opc = Res.getOpcode();
+ switch (Opc) {
+ case HexagonISD::VASR:
+ case HexagonISD::VLSR:
+ case HexagonISD::VASL:
+ break;
+ default:
+ // No instructions for shifts by non-scalars.
+ return SDValue();
+ }
+
+ SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1);
+
+ auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
+ MVT Ty = ty(V);
+ MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements());
+ SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy)
+ : DAG.getZExtOrTrunc(V, dl, ExtTy);
+ SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A});
+ return DAG.getZExtOrTrunc(ExtS, dl, Ty);
+ };
+
+ if (ResTy.getSizeInBits() == 32)
+ return ShiftPartI8(Opc, Val, Amt);
+
+ auto [LoV, HiV] = opSplit(Val, dl, DAG);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy,
+ {ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
}
SDValue
diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll b/llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll
new file mode 100644
index 0000000000000..db1c454cd7645
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll
@@ -0,0 +1,239 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+define <4 x i8> @f0(<4 x i8> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f0:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vsxtbh(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vasrh(r1:0,#1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunehb(r1:0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = ashr <4 x i8> %a0, <i8 1, i8 1, i8 1, i8 1>
+ ret <4 x i8> %v0
+}
+
+define <4 x i8> @f1(<4 x i8> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vzxtbh(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunehb(r1:0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = lshr <4 x i8> %a0, <i8 1, i8 1, i8 1, i8 1>
+ ret <4 x i8> %v0
+}
+
+define <4 x i8> @f2(<4 x i8> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vzxtbh(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaslh(r1:0,#1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunehb(r1:0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = shl <4 x i8> %a0, <i8 1, i8 1, i8 1, i8 1>
+ ret <4 x i8> %v0
+}
+
+
+define <8 x i8> @f3(<8 x i8> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vsxtbh(r1)
+; CHECK-NEXT: r5:4 = vsxtbh(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vasrh(r5:4,#1)
+; CHECK-NEXT: r3:2 = vasrh(r3:2,#1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunehb(r1:0)
+; CHECK-NEXT: r1 = vtrunehb(r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = ashr <8 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <8 x i8> %v0
+}
+
+define <8 x i8> @f4(<8 x i8> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f4:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vzxtbh(r1)
+; CHECK-NEXT: r5:4 = vzxtbh(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vlsrh(r5:4,#1)
+; CHECK-NEXT: r3:2 = vlsrh(r3:2,#1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunehb(r1:0)
+; CHECK-NEXT: r1 = vtrunehb(r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = lshr <8 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <8 x i8> %v0
+}
+
+define <8 x i8> @f5(<8 x i8> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f5:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vzxtbh(r1)
+; CHECK-NEXT: r5:4 = vzxtbh(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaslh(r5:4,#1)
+; CHECK-NEXT: r3:2 = vaslh(r3:2,#1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunehb(r1:0)
+; CHECK-NEXT: r1 = vtrunehb(r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = shl <8 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <8 x i8> %v0
+}
+
+
+define <2 x i16> @f6(<2 x i16> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f6:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vasrh(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = ashr <2 x i16> %a0, <i16 1, i16 1>
+ ret <2 x i16> %v0
+}
+
+define <2 x i16> @f7(<2 x i16> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f7:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = lshr <2 x i16> %a0, <i16 1, i16 1>
+ ret <2 x i16> %v0
+}
+
+define <2 x i16> @f8(<2 x i16> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f8:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaslh(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = shl <2 x i16> %a0, <i16 1, i16 1>
+ ret <2 x i16> %v0
+}
+
+
+define <4 x i16> @f9(<4 x i16> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f9:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vasrh(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = ashr <4 x i16> %a0, <i16 1, i16 1, i16 1, i16 1>
+ ret <4 x i16> %v0
+}
+
+define <4 x i16> @f10(<4 x i16> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f10:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = lshr <4 x i16> %a0, <i16 1, i16 1, i16 1, i16 1>
+ ret <4 x i16> %v0
+}
+
+define <4 x i16> @f11(<4 x i16> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f11:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaslh(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = shl <4 x i16> %a0, <i16 1, i16 1, i16 1, i16 1>
+ ret <4 x i16> %v0
+}
+
+
+define <2 x i32> @f12(<2 x i32> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f12:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vasrw(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = ashr <2 x i32> %a0, <i32 1, i32 1>
+ ret <2 x i32> %v0
+}
+
+define <2 x i32> @f13(<2 x i32> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f13:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vlsrw(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = lshr <2 x i32> %a0, <i32 1, i32 1>
+ ret <2 x i32> %v0
+}
+
+define <2 x i32> @f14(<2 x i32> %a0) unnamed_addr #0 {
+; CHECK-LABEL: f14:
+; CHECK: // %bb.0: // %b0
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaslw(r1:0,#1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+b0:
+ %v0 = shl <2 x i32> %a0, <i32 1, i32 1>
+ ret <2 x i32> %v0
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list