[llvm] b3bd0c3 - [SystemZ] Eliminate the need to create a zero vector by reusing the VPERM mask.
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Tue May 19 00:38:18 PDT 2020
Author: Jonas Paulsson
Date: 2020-05-19T09:37:19+02:00
New Revision: b3bd0c37eced4d149b44eaa9a2700c073927b543
URL: https://github.com/llvm/llvm-project/commit/b3bd0c37eced4d149b44eaa9a2700c073927b543
DIFF: https://github.com/llvm/llvm-project/commit/b3bd0c37eced4d149b44eaa9a2700c073927b543.diff
LOG: [SystemZ] Eliminate the need to create a zero vector by reusing the VPERM mask.
Try to avoid creating VGBMs by reusing the permutation mask if it contains a
zero. If the first byte was into (any byte of) a zero vector, then the first
byte of the mask can become zero and reused by putting the mask also as the
first operand. If there instead was a first-byte use of the other source
operand, then that zero index can be reused if the mask is placed as the
second operand.
Review: Ulrich Weigand
Differential Revision: https://reviews.llvm.org/D79925
Added:
Modified:
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/test/CodeGen/SystemZ/vec-perm-14.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 7a8b5249255f..59896d628816 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4450,6 +4450,13 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
return Op;
}
+static bool isZeroVector(SDValue N) {
+ if (N->getOpcode() == ISD::SPLAT_VECTOR)
+ if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
+ return Op->getZExtValue() == 0;
+ return ISD::isBuildVectorAllZeros(N.getNode());
+}
+
// Bytes is a VPERM-like permute vector, except that -1 is used for
// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
// VSLDB or VPERM.
@@ -4466,7 +4473,54 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
Ops[OpNo1],
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
- // Fall back on VPERM. Construct an SDNode for the permute vector.
+ // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
+ // eliminate a zero vector by reusing any zero index in the permute vector.
+ unsigned ZeroVecIdx =
+ isZeroVector(Ops[0]) ? 0 : (isZeroVector(Ops[1]) ? 1 : UINT_MAX);
+ if (ZeroVecIdx != UINT_MAX) {
+ bool MaskFirst = true;
+ int ZeroIdx = -1;
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx && I == 0) {
+ // If the first byte is zero, use mask as first operand.
+ ZeroIdx = 0;
+ break;
+ }
+ if (OpNo != ZeroVecIdx && Byte == 0) {
+ // If mask contains a zero, use it by placing that vector first.
+ ZeroIdx = I + SystemZ::VectorBytes;
+ MaskFirst = false;
+ break;
+ }
+ }
+ if (ZeroIdx != -1) {
+ SDValue IndexNodes[SystemZ::VectorBytes];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx)
+ IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
+ else {
+ unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
+ IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
+ }
+ } else
+ IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+ }
+ SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
+ SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
+ if (MaskFirst)
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
+ Mask);
+ else
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
+ Mask);
+ }
+ }
+
SDValue IndexNodes[SystemZ::VectorBytes];
for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
if (Bytes[I] >= 0)
diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
index 0cf3c6ef7a06..3678969ac897 100644
--- a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
@@ -3,7 +3,7 @@
; Test that only one vperm of the vector compare is needed for both extracts.
define void @fun() {
-; CHECK-LABEL: fun
+; CHECK-LABEL: fun:
; CHECK: vperm
; CHECK-NOT: vperm
bb:
@@ -25,3 +25,74 @@ bb3:
bb4:
unreachable
}
+
+; Test that a zero index in the permute vector is used instead of VGBM, with
+; a zero index into the other source operand.
+define <4 x i8> @fun1(<2 x i8> %arg) {
+; CHECK-LABEL:.LCPI1_0:
+; CHECK-NEXT: .byte 1 # 0x1
+; CHECK-NEXT: .byte 18 # 0x12
+; CHECK-NEXT: .byte 0 # 0x0
+; CHECK-NEXT: .byte 18 # 0x12
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .text
+; CHECK-NEXT: .globl fun1
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .type fun1, at function
+; CHECK-NEXT: fun1: # @fun1
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI1_0
+; CHECK-NEXT: vl %v0, 0(%r1), 3
+; CHECK-NEXT: vperm %v24, %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
+ <4 x i32> <i32 1, i32 2, i32 0, i32 3>
+ ret <4 x i8> %res
+}
+
+; Same, but with the first byte indexing into an element of the zero vector.
+define <4 x i8> @fun2(<2 x i8> %arg) {
+; CHECK-LABEL:.LCPI2_0:
+; CHECK-NEXT: .byte 0 # 0x0
+; CHECK-NEXT: .byte 17 # 0x11
+; CHECK-NEXT: .byte 17 # 0x11
+; CHECK-NEXT: .byte 0 # 0x0
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .space 1
+; CHECK-NEXT: .text
+; CHECK-NEXT: .globl fun2
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .type fun2, at function
+; CHECK-NEXT:fun2: # @fun2
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT:# %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI2_0
+; CHECK-NEXT: vl %v0, 0(%r1), 3
+; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
+; CHECK-NEXT: br %r14
+ %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
+ <4 x i32> <i32 3, i32 1, i32 1, i32 2>
+ ret <4 x i8> %res
+}
More information about the llvm-commits
mailing list