[llvm] [IA]: Construct (de)interleave4 out of (de)interleave2 (PR #89276)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 2 04:21:14 PDT 2024
================
@@ -6,28 +6,35 @@
target triple = "aarch64-linux-gnu"
-define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2(ptr %ptr) {
-; NEON-LABEL: define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2
+define void @deinterleave_i8_factor2(ptr %ptr) {
+; NEON-LABEL: define void @deinterleave_i8_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[PTR]])
-; NEON-NEXT: ret { <16 x i8>, <16 x i8> } [[LDN]]
+; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0
+; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1
+; NEON-NEXT: ret void
;
-; SVE-FIXED-LABEL: define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2
+; SVE-FIXED-LABEL: define void @deinterleave_i8_factor2
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; SVE-FIXED-NEXT: [[LOAD:%.*]] = load <32 x i8>, ptr [[PTR]], align 1
; SVE-FIXED-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> [[LOAD]])
-; SVE-FIXED-NEXT: ret { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]]
+; SVE-FIXED-NEXT: [[EXTRACT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]], 0
+; SVE-FIXED-NEXT: [[EXTRACT2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]], 1
+; SVE-FIXED-NEXT: ret void
;
%load = load <32 x i8>, ptr %ptr, align 1
%deinterleave = tail call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> %load)
- ret { <16 x i8>, <16 x i8> } %deinterleave
+ %extract1 = extractvalue { <16 x i8>, <16 x i8> } %deinterleave, 0
+ %extract2 = extractvalue { <16 x i8>, <16 x i8> } %deinterleave, 1
+ ret void
}
define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2(ptr %ptr) {
; NEON-LABEL: define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
-; NEON-NEXT: [[LDN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[PTR]])
-; NEON-NEXT: ret { <8 x i16>, <8 x i16> } [[LDN]]
+; NEON-NEXT: [[LOAD:%.*]] = load <16 x i16>, ptr [[PTR]], align 2
+; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <8 x i16>, <8 x i16> } @llvm.vector.deinterleave2.v16i16(<16 x i16> [[LOAD]])
+; NEON-NEXT: ret { <8 x i16>, <8 x i16> } [[DEINTERLEAVE]]
----------------
paulwalker-arm wrote:
The original tests show the `dinterleave(load())->ld2` transformation can be done when the results of the deinterleave are not explicitly extracted (via extractvalue). With your change the transformation is only possible when the result of the deinterleave are explicitly extracted.
Personally I don't see this as a problem as this stage because that's not the expected use case and so I'm happy for the tests to be updated to show the transformation as you did for `deinterleave_i8_factor2`.
https://github.com/llvm/llvm-project/pull/89276
More information about the llvm-commits
mailing list