[PATCH] D71672: [AArch64] match splat of bitcasted extract subvector to DUPLANE

Wed Dec 18 12:53:34 PST 2019

spatel created this revision.
spatel added reviewers: efriedma, dmgreen, t.p.northover.
Herald added subscribers: hiraditya, kristof.beyls, mcrosier.
Herald added a project: LLVM.

This is another potential regression exposed by D63815 <https://reviews.llvm.org/D63815>.

Here we peek through a bitcast to find an extract subvector and scale the splat offset based on that:
splat (bitcast (extract X, C)), LaneC --> duplane (bitcast X), LaneC'


https://reviews.llvm.org/D71672

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll


Index: llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
===================================================================

--- llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -1663,8 +1663,7 @@
 define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmul_laneq3_f32_bitcast:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[1]
+; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[3]
 ; CHECK-NEXT:    ret
   %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
   %bc = bitcast <1 x double> %extract to <2 x float>
@@ -1676,8 +1675,7 @@
 define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmul_laneq2_f32_bitcast:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[0]
+; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[2]
 ; CHECK-NEXT:    ret
   %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
   %bc = bitcast <1 x double> %extract to <2 x float>
@@ -1689,8 +1687,7 @@
 define <4 x i16> @test_vmul_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmul_laneq5_i16_bitcast:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT:    dup v1.4h, v1.h[1]
+; CHECK-NEXT:    dup v1.4h, v1.h[5]
 ; CHECK-NEXT:    add v0.4h, v1.4h, v0.4h
 ; CHECK-NEXT:    ret
   %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7043,7 +7043,22 @@
     // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
     // to make a vector of the same size as this SHUFFLE. We can ignore the
     // extract entirely, and canonicalise the concat using WidenVector.
-    if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+    unsigned VTEltBitWidth = VT.getScalarSizeInBits();
+    if (V1.getOpcode() == ISD::BITCAST &&
+        V1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        V1.getOperand(0).getScalarValueSizeInBits() % VTEltBitWidth == 0) {
+      // If the extract is bitcast to smaller type, offset the DUPLANE index to
+      // account for that and bitcast the DUPLANE operand.
+      SDValue SrcOp = V1.getOperand(0);
+      unsigned ExtIdx = SrcOp.getConstantOperandVal(1);
+      unsigned Scale = SrcOp.getScalarValueSizeInBits() / VTEltBitWidth;
+      Lane += ExtIdx * Scale;
+      unsigned WideVecNumElts =
+          SrcOp.getOperand(0).getValueType().getVectorNumElements();
+      MVT CastVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(),
+                                    WideVecNumElts * Scale);
+      V1 = DAG.getBitcast(CastVT, SrcOp.getOperand(0));
+    } else if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
       Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
       V1 = V1.getOperand(0);
     } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71672.234598.patch
Type: text/x-patch
Size: 3256 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191218/3fc2d081/attachment.bin>