[llvm] [IA]: Construct (de)interleave4 out of (de)interleave2 (PR #89276)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 30 09:56:17 PDT 2024


================
@@ -6,28 +6,35 @@
 
 target triple = "aarch64-linux-gnu"
 
-define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2(ptr %ptr) {
-; NEON-LABEL: define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2
+define void @deinterleave_i8_factor2(ptr %ptr) {
+; NEON-LABEL: define void @deinterleave_i8_factor2
 ; NEON-SAME: (ptr [[PTR:%.*]]) {
 ; NEON-NEXT:    [[LDN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[PTR]])
-; NEON-NEXT:    ret { <16 x i8>, <16 x i8> } [[LDN]]
+; NEON-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0
+; NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1
+; NEON-NEXT:    ret void
 ;
-; SVE-FIXED-LABEL: define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2
+; SVE-FIXED-LABEL: define void @deinterleave_i8_factor2
 ; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; SVE-FIXED-NEXT:    [[LOAD:%.*]] = load <32 x i8>, ptr [[PTR]], align 1
 ; SVE-FIXED-NEXT:    [[DEINTERLEAVE:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> [[LOAD]])
-; SVE-FIXED-NEXT:    ret { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]]
+; SVE-FIXED-NEXT:    [[EXTRACT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]], 0
+; SVE-FIXED-NEXT:    [[EXTRACT2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]], 1
+; SVE-FIXED-NEXT:    ret void
 ;
   %load = load <32 x i8>, ptr %ptr, align 1
   %deinterleave = tail call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> %load)
-  ret { <16 x i8>, <16 x i8> } %deinterleave
+  %extract1 = extractvalue { <16 x i8>, <16 x i8> } %deinterleave, 0
+  %extract2 = extractvalue { <16 x i8>, <16 x i8> } %deinterleave, 1
+  ret void
 }
 
 define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2(ptr %ptr) {
 ; NEON-LABEL: define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2
 ; NEON-SAME: (ptr [[PTR:%.*]]) {
-; NEON-NEXT:    [[LDN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[PTR]])
-; NEON-NEXT:    ret { <8 x i16>, <8 x i16> } [[LDN]]
+; NEON-NEXT:    [[LOAD:%.*]] = load <16 x i16>, ptr [[PTR]], align 2
+; NEON-NEXT:    [[DEINTERLEAVE:%.*]] = tail call { <8 x i16>, <8 x i16> } @llvm.vector.deinterleave2.v16i16(<16 x i16> [[LOAD]])
+; NEON-NEXT:    ret { <8 x i16>, <8 x i16> } [[DEINTERLEAVE]]
----------------
paulwalker-arm wrote:

This suggests you'll need to update more of the tests like you have above by adding instances of `extractvalue`.  

At some point we might want to restore the original behaviour but I don't think the current way these tests are written match the intent of the original work and I'm happy to loose this bit of flexibility given the bugs this PR fixes alongside the new factor4 support.

https://github.com/llvm/llvm-project/pull/89276


More information about the llvm-commits mailing list