[PATCH] D71672: [AArch64] match splat of bitcasted extract subvector to DUPLANE
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 12:53:34 PST 2019
spatel created this revision.
spatel added reviewers: efriedma, dmgreen, t.p.northover.
Herald added subscribers: hiraditya, kristof.beyls, mcrosier.
Herald added a project: LLVM.
This is another potential regression exposed by D63815 <https://reviews.llvm.org/D63815>.
Here we peek through a bitcast to find an extract subvector and scale the splat offset based on that:
splat (bitcast (extract X, C)), LaneC --> duplane (bitcast X), LaneC'
https://reviews.llvm.org/D71672
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
Index: llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -1663,8 +1663,7 @@
define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v) {
; CHECK-LABEL: test_vmul_laneq3_f32_bitcast:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[1]
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
; CHECK-NEXT: ret
%extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
%bc = bitcast <1 x double> %extract to <2 x float>
@@ -1676,8 +1675,7 @@
define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v) {
; CHECK-LABEL: test_vmul_laneq2_f32_bitcast:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[2]
; CHECK-NEXT: ret
%extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
%bc = bitcast <1 x double> %extract to <2 x float>
@@ -1689,8 +1687,7 @@
define <4 x i16> @test_vmul_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) {
; CHECK-LABEL: test_vmul_laneq5_i16_bitcast:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: dup v1.4h, v1.h[1]
+; CHECK-NEXT: dup v1.4h, v1.h[5]
; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
; CHECK-NEXT: ret
%extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7043,7 +7043,22 @@
// SelectionDAGBuilder may have "helpfully" already extracted or conatenated
// to make a vector of the same size as this SHUFFLE. We can ignore the
// extract entirely, and canonicalise the concat using WidenVector.
- if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ unsigned VTEltBitWidth = VT.getScalarSizeInBits();
+ if (V1.getOpcode() == ISD::BITCAST &&
+ V1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ V1.getOperand(0).getScalarValueSizeInBits() % VTEltBitWidth == 0) {
+ // If the extract is bitcast to smaller type, offset the DUPLANE index to
+ // account for that and bitcast the DUPLANE operand.
+ SDValue SrcOp = V1.getOperand(0);
+ unsigned ExtIdx = SrcOp.getConstantOperandVal(1);
+ unsigned Scale = SrcOp.getScalarValueSizeInBits() / VTEltBitWidth;
+ Lane += ExtIdx * Scale;
+ unsigned WideVecNumElts =
+ SrcOp.getOperand(0).getValueType().getVectorNumElements();
+ MVT CastVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(),
+ WideVecNumElts * Scale);
+ V1 = DAG.getBitcast(CastVT, SrcOp.getOperand(0));
+ } else if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
V1 = V1.getOperand(0);
} else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71672.234598.patch
Type: text/x-patch
Size: 3256 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191218/3fc2d081/attachment.bin>
More information about the llvm-commits
mailing list