[llvm] 83288f8 - [AArch64] Custom lower `ISD::ZERO_EXTEND_VECTOR_INREG`
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 26 11:54:34 PST 2022
Author: Roman Lebedev
Date: 2022-12-26T22:54:03+03:00
New Revision: 83288f8063377f03cbcf3e89c940d2a62c855a96
URL: https://github.com/llvm/llvm-project/commit/83288f8063377f03cbcf3e89c940d2a62c855a96
DIFF: https://github.com/llvm/llvm-project/commit/83288f8063377f03cbcf3e89c940d2a62c855a96.diff
LOG: [AArch64] Custom lower `ISD::ZERO_EXTEND_VECTOR_INREG`
The baseline legalization for `ISD::ZERO_EXTEND_VECTOR_INREG`
(`VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG`),
blends-in the zeros, but as mentioned e.g.
in b4bd0a404fe26071dab0854dfd9767974909c7c4,
there is no such thing for AArch64.
So some of the shuffles that would be nicely lowered
by `LowerVECTOR_SHUFFLE()`, e.g. into `ZIP1`,
would now be unrecognizable after round-tripping
through `ISD::ZERO_EXTEND_VECTOR_INREG` recognition & legalization.
The most obvious solution is to just custom-lower
`ISD::ZERO_EXTEND_VECTOR_INREG` as the `ZIP1`-with-zeros,
like it would have been originally in that test case.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/aarch64-vuzp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2627e447a4816..d65693f4391bb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1554,6 +1554,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -5919,6 +5920,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
@@ -11443,6 +11446,27 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
}
+// Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
+// but we don't have an appropriate instruction,
+// so custom-lower it as ZIP1-with-zeros.
+SDValue
+AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ SDValue SrcOp = Op.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
+ "Unexpected extension factor.");
+ unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
+ // FIXME: support multi-step zipping?
+ if (Scale != 2)
+ return SDValue();
+ SDValue Zeros = DAG.getConstant(0, dl, SrcVT);
+ return DAG.getBitcast(VT,
+ DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros));
+}
+
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1eac0afea3953..e62a1bf2ec0b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1039,6 +1039,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll b/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll
index 10326997938c2..c1d9caebb1d2c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll
@@ -59,10 +59,8 @@ entry:
; Check that this pattern is recognized as a VZIP and
; that the vector blend transform does not scramble the pattern.
-; FIXME: we can not recognize generic ZERO_EXTEND_VECTOR_INREG legalization
-; as a zip1.
; CHECK-LABEL: vzipNoBlend:
-; CHECK-NOT: zip1
+; CHECK: zip1
define <8 x i8> @vzipNoBlend(ptr %A, ptr %B) nounwind {
%t = load <8 x i8>, ptr %A
%vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -78,9 +76,8 @@ define <8 x i8> @vzipNoBlendCommutted(ptr %A, ptr %B) nounwind {
ret <8 x i8> %vzip
}
-; FIXME: this is identical to @vzipNoBlend
; CHECK-LABEL: vzipStillZExt:
-; CHECK-NOT: zip1
+; CHECK: zip1
define <8 x i8> @vzipStillZExt(ptr %A, ptr %B) nounwind {
%t = load <8 x i8>, ptr %A
%vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 9, i32 1, i32 9, i32 2, i32 9, i32 3, i32 9>
More information about the llvm-commits
mailing list