[PATCH] D113270: [SVE][CodeGen] Improve codegen for some FP insert_subvector cases

Fri Nov 5 05:07:28 PDT 2021

david-arm created this revision.
david-arm added reviewers: sdesmalen, CarolineConcatto, bsmith.
Herald added subscribers: ctetreau, psnobl, hiraditya, tschuett.
Herald added a reviewer: efriedma.
david-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

When inserting an unpacked FP subvector into a packed vector we
can simply cast the unpacked value into a packed value, since
both types are legal for SVE. We can then use this as the input
for the UZP instruction. This avoids us expanding the operation
by going through the stack.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D113270

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/sve-insert-vector.ll


Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll
===================================================================

--- llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -424,14 +424,7 @@
 define <vscale x 3 x float> @insert_nxv3f32_nxv2f32(<vscale x 2 x float> %sv0) nounwind {
 ; CHECK-LABEL: insert_nxv3f32_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    st1w { z0.d }, p0, [sp]
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
-; CHECK-NEXT:    addvl sp, sp, #1
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    ret
   %v0 = call <vscale x 3 x float> @llvm.experimental.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float> undef, <vscale x 2 x float> %sv0, i64 0)
  ret <vscale x 3 x float> %v0
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10912,7 +10912,7 @@
     SDLoc DL(Op);
     EVT VT = Op.getValueType();
 
-    if (!isTypeLegal(VT) || !VT.isInteger())
+    if (!isTypeLegal(VT))
       return SDValue();
 
     SDValue Vec0 = Op.getOperand(0);
@@ -10922,9 +10922,19 @@
     if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
       return SDValue();
 
-    // Extend elements of smaller vector...
-    EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
-    SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+    EVT WideVT;
+    SDValue ExtVec;
+
+    if (VT.isFloatingPoint()) {
+      // The InVT type should be legal. We can safely cast the unpacked
+      // subvector from InVT -> VT.
+      WideVT = VT;
+      ExtVec = getSVESafeBitCast(VT, Vec1, DAG);
+    } else {
+      // Extend elements of smaller vector...
+      WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
+      ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+    }
 
     if (Idx == 0) {
       SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113270.385031.patch
Type: text/x-patch
Size: 2296 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211105/0172e803/attachment.bin>