[PATCH] D138791: [AArch64][SME]: Generate streaming-compatible code for ld2-alloca.

hassnaaHamdi via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 28 04:44:37 PST 2022


hassnaa-arm created this revision.
hassnaa-arm added reviewers: david-arm, sdesmalen.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
hassnaa-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

To generate code compatible to streaming mode:

- disable lowering interleaved load to avoid generating invalid NEON intrinsics.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D138791

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll


Index: llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -8,23 +8,37 @@
 define void @st1d_fixed(ptr %st_ptr) #0 {
 ; CHECK-LABEL: st1d_fixed:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #160
-; CHECK-NEXT:    .cfi_def_cfa_offset 160
-; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
+; CHECK-NEXT:    sub sp, sp, #128
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x0, sp
-; CHECK-NEXT:    mov x20, sp
 ; CHECK-NEXT:    bl def
-; CHECK-NEXT:    ld2 { v0.2d, v1.2d }, [x20], #32
-; CHECK-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
-; CHECK-NEXT:    ld2 { v2.2d, v3.2d }, [x20]
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    sub x8, x8, #2
+; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [sp]
+; CHECK-NEXT:    mov w9, #2
+; CHECK-NEXT:    cmp x8, #2
+; CHECK-NEXT:    csel x8, x8, x9, lo
+; CHECK-NEXT:    add x10, sp, #128
+; CHECK-NEXT:    lsl x8, x8, #3
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    add x9, sp, #128
+; CHECK-NEXT:    st1d { z0.d }, p0, [x10]
+; CHECK-NEXT:    ldr q2, [x9, x8]
 ; CHECK-NEXT:    stp q0, q2, [x19]
-; CHECK-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #160
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    add sp, sp, #128
+; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x29, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %alloc = alloca [16 x double]
   call void @def(ptr %alloc)
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13966,10 +13966,11 @@
   if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
     return false;
 
-  if (Subtarget->useSVEForFixedLengthVectors() &&
+  if (Subtarget->forceStreamingCompatibleSVE() ||
+      (Subtarget->useSVEForFixedLengthVectors() &&
       (VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
        (VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
-        isPowerOf2_32(NumElements) && VecSize > 128))) {
+        isPowerOf2_32(NumElements) && VecSize > 128)))) {
     UseScalable = true;
     return true;
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138791.478192.patch
Type: text/x-patch
Size: 3258 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221128/368de91d/attachment.bin>


More information about the llvm-commits mailing list