[PATCH] D138791: [AArch64][SME]: Generate streaming-compatible code for ld2-alloca.
hassnaaHamdi via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 28 04:44:37 PST 2022
hassnaa-arm created this revision.
hassnaa-arm added reviewers: david-arm, sdesmalen.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
hassnaa-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
To generate code compatible to streaming mode:
- disable lowering interleaved load to avoid generating invalid NEON intrinsics.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D138791
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
Index: llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -8,23 +8,37 @@
define void @st1d_fixed(ptr %st_ptr) #0 {
; CHECK-LABEL: st1d_fixed:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #160
-; CHECK-NEXT: .cfi_def_cfa_offset 160
-; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset w29, -32
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
+; CHECK-NEXT: sub sp, sp, #128
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: mov x20, sp
; CHECK-NEXT: bl def
-; CHECK-NEXT: ld2 { v0.2d, v1.2d }, [x20], #32
-; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
-; CHECK-NEXT: ld2 { v2.2d, v3.2d }, [x20]
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: sub x8, x8, #2
+; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp]
+; CHECK-NEXT: mov w9, #2
+; CHECK-NEXT: cmp x8, #2
+; CHECK-NEXT: csel x8, x8, x9, lo
+; CHECK-NEXT: add x10, sp, #128
+; CHECK-NEXT: lsl x8, x8, #3
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x9, sp, #128
+; CHECK-NEXT: st1d { z0.d }, p0, [x10]
+; CHECK-NEXT: ldr q2, [x9, x8]
; CHECK-NEXT: stp q0, q2, [x19]
-; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #160
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #128
+; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca [16 x double]
call void @def(ptr %alloc)
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13966,10 +13966,11 @@
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
- if (Subtarget->useSVEForFixedLengthVectors() &&
+ if (Subtarget->forceStreamingCompatibleSVE() ||
+ (Subtarget->useSVEForFixedLengthVectors() &&
(VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
(VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
- isPowerOf2_32(NumElements) && VecSize > 128))) {
+ isPowerOf2_32(NumElements) && VecSize > 128)))) {
UseScalable = true;
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138791.478192.patch
Type: text/x-patch
Size: 3258 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221128/368de91d/attachment.bin>
More information about the llvm-commits
mailing list