[llvm] [LLVM][DAGCombiner] Extend coverage for insert_subv(undef, extract_subv(A, 0), 0) (PR #95242)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 12 06:02:12 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

<details>
<summary>Changes</summary>

There is an existing combine to remove the need for extract_subv that requires matching vector types (all fixed or all scalable).

The combine doesn't need this restriction and so I've changed it to use ValueType's "knownBits??" interface that supports mixed vector types. In doing so we also need extra guards to prevent invalid operations (e.g. extracting a scalable vector from a fixed length vector).

---

Patch is 80.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95242.diff


8 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+5-4) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll (+128-126) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+144-166) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+14-18) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+170-172) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+66-68) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll (+9-10) 
- (added) llvm/test/CodeGen/AArch64/vector-insert-dag-combines.ll (+310) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4fcbe08e4b2b9..8ea09a5744322 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26417,12 +26417,13 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
       return N1.getOperand(0);
     // TODO: To remove the zero check, need to adjust the offset to
     // a multiple of the new src type.
-    if (isNullConstant(N2) &&
-        VT.isScalableVector() == SrcVT.isScalableVector()) {
-      if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
+    if (isNullConstant(N2)) {
+      if (VT.knownBitsGE(SrcVT) &&
+          !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
         return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
                            VT, N0, N1.getOperand(0), N2);
-      else
+      else if (VT.knownBitsLE(SrcVT) &&
+               !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
                            VT, N1.getOperand(0), N2);
     }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 0d92a6fa0fa28..a206fbc510295 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -519,32 +519,32 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) {
 define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzu_v8f16_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    mov z2.h, z0.h[3]
-; CHECK-NEXT:    mov z3.h, z0.h[2]
-; CHECK-NEXT:    fcvtzu x8, h0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    fcvtzu x9, h1
-; CHECK-NEXT:    fcvtzu x10, h2
-; CHECK-NEXT:    fcvtzu x11, h3
-; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    mov z2.h, z0.h[3]
+; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    fcvtzu x12, h0
-; CHECK-NEXT:    mov z0.h, z0.h[2]
-; CHECK-NEXT:    stp x8, x9, [sp, #32]
+; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
+; CHECK-NEXT:    mov z2.h, z1.h[1]
 ; CHECK-NEXT:    fcvtzu x8, h1
+; CHECK-NEXT:    mov z3.h, z1.h[3]
+; CHECK-NEXT:    mov z1.h, z1.h[2]
 ; CHECK-NEXT:    fcvtzu x9, h2
-; CHECK-NEXT:    stp x11, x10, [sp, #48]
+; CHECK-NEXT:    mov z2.h, z0.h[1]
+; CHECK-NEXT:    fcvtzu x10, h3
+; CHECK-NEXT:    mov z3.h, z0.h[3]
+; CHECK-NEXT:    fcvtzu x11, h1
+; CHECK-NEXT:    mov z0.h, z0.h[2]
+; CHECK-NEXT:    stp x8, x9, [sp, #-64]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    fcvtzu x8, h2
+; CHECK-NEXT:    fcvtzu x9, h3
+; CHECK-NEXT:    stp x11, x10, [sp, #16]
 ; CHECK-NEXT:    fcvtzu x10, h0
-; CHECK-NEXT:    ldp q2, q3, [sp, #32]
-; CHECK-NEXT:    stp x12, x8, [sp]
-; CHECK-NEXT:    stp x10, x9, [sp, #16]
-; CHECK-NEXT:    ldp q1, q0, [sp]
-; CHECK-NEXT:    stp q2, q3, [x1]
-; CHECK-NEXT:    stp q1, q0, [x1, #32]
+; CHECK-NEXT:    ldp q2, q3, [sp]
+; CHECK-NEXT:    stp x12, x8, [sp, #32]
+; CHECK-NEXT:    stp x10, x9, [sp, #48]
+; CHECK-NEXT:    ldp q1, q0, [sp, #32]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q1, q0, [x1]
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 ;
@@ -598,55 +598,56 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
 define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzu_v16f16_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    mov z2.h, z1.h[1]
-; CHECK-NEXT:    mov z3.h, z1.h[3]
-; CHECK-NEXT:    mov z4.h, z1.h[2]
-; CHECK-NEXT:    fcvtzu x8, h1
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    mov z5.h, z0.h[3]
-; CHECK-NEXT:    fcvtzu x10, h0
-; CHECK-NEXT:    fcvtzu x9, h2
-; CHECK-NEXT:    fcvtzu x11, h3
-; CHECK-NEXT:    fcvtzu x12, h4
-; CHECK-NEXT:    mov z2.h, z1.h[1]
-; CHECK-NEXT:    mov z4.h, z1.h[3]
-; CHECK-NEXT:    fcvtzu x13, h1
-; CHECK-NEXT:    mov z1.h, z1.h[2]
-; CHECK-NEXT:    mov z3.h, z0.h[1]
-; CHECK-NEXT:    stp x8, x9, [sp, #32]
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    ext z2.b, z2.b, z1.b, #8
+; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT:    mov z4.h, z2.h[1]
 ; CHECK-NEXT:    fcvtzu x8, h2
+; CHECK-NEXT:    mov z5.h, z2.h[3]
+; CHECK-NEXT:    mov z2.h, z2.h[2]
+; CHECK-NEXT:    fcvtzu x12, h3
 ; CHECK-NEXT:    fcvtzu x9, h4
-; CHECK-NEXT:    stp x12, x11, [sp, #48]
+; CHECK-NEXT:    mov z4.h, z3.h[1]
+; CHECK-NEXT:    fcvtzu x10, h5
+; CHECK-NEXT:    mov z5.h, z3.h[3]
+; CHECK-NEXT:    fcvtzu x11, h2
+; CHECK-NEXT:    mov z2.h, z3.h[2]
+; CHECK-NEXT:    stp x8, x9, [sp, #-128]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    fcvtzu x8, h4
+; CHECK-NEXT:    fcvtzu x9, h5
+; CHECK-NEXT:    stp x11, x10, [sp, #16]
+; CHECK-NEXT:    fcvtzu x10, h2
+; CHECK-NEXT:    mov z3.h, z1.h[1]
+; CHECK-NEXT:    mov z4.h, z1.h[3]
 ; CHECK-NEXT:    fcvtzu x11, h1
-; CHECK-NEXT:    mov z2.h, z0.h[2]
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    mov z1.h, z1.h[2]
+; CHECK-NEXT:    mov z2.h, z0.h[1]
+; CHECK-NEXT:    stp x12, x8, [sp, #64]
 ; CHECK-NEXT:    fcvtzu x12, h3
-; CHECK-NEXT:    stp x13, x8, [sp]
-; CHECK-NEXT:    fcvtzu x8, h5
-; CHECK-NEXT:    stp x11, x9, [sp, #16]
-; CHECK-NEXT:    fcvtzu x9, h2
-; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    mov z2.h, z0.h[3]
-; CHECK-NEXT:    fcvtzu x11, h0
+; CHECK-NEXT:    fcvtzu x8, h4
+; CHECK-NEXT:    stp x10, x9, [sp, #80]
+; CHECK-NEXT:    fcvtzu x9, h1
+; CHECK-NEXT:    mov z3.h, z0.h[3]
+; CHECK-NEXT:    fcvtzu x10, h0
 ; CHECK-NEXT:    mov z0.h, z0.h[2]
-; CHECK-NEXT:    stp x10, x12, [sp, #96]
-; CHECK-NEXT:    ldp q3, q4, [sp]
-; CHECK-NEXT:    fcvtzu x10, h1
-; CHECK-NEXT:    fcvtzu x12, h2
-; CHECK-NEXT:    stp x9, x8, [sp, #112]
+; CHECK-NEXT:    stp x11, x12, [sp, #32]
+; CHECK-NEXT:    fcvtzu x11, h2
+; CHECK-NEXT:    fcvtzu x12, h3
+; CHECK-NEXT:    stp x9, x8, [sp, #48]
 ; CHECK-NEXT:    fcvtzu x8, h0
-; CHECK-NEXT:    ldp q0, q1, [sp, #32]
-; CHECK-NEXT:    ldp q6, q7, [sp, #96]
-; CHECK-NEXT:    stp x11, x10, [sp, #64]
-; CHECK-NEXT:    stp x8, x12, [sp, #80]
-; CHECK-NEXT:    ldp q5, q2, [sp, #64]
-; CHECK-NEXT:    stp q0, q1, [x1]
-; CHECK-NEXT:    stp q3, q4, [x1, #32]
-; CHECK-NEXT:    stp q6, q7, [x1, #64]
-; CHECK-NEXT:    stp q5, q2, [x1, #96]
+; CHECK-NEXT:    ldp q0, q1, [sp]
+; CHECK-NEXT:    ldp q3, q4, [sp, #64]
+; CHECK-NEXT:    stp x10, x11, [sp, #96]
+; CHECK-NEXT:    ldp q6, q7, [sp, #32]
+; CHECK-NEXT:    stp x8, x12, [sp, #112]
+; CHECK-NEXT:    ldp q5, q2, [sp, #96]
+; CHECK-NEXT:    stp q0, q1, [x1, #32]
+; CHECK-NEXT:    stp q6, q7, [x1]
+; CHECK-NEXT:    stp q3, q4, [x1, #96]
+; CHECK-NEXT:    stp q5, q2, [x1, #64]
 ; CHECK-NEXT:    add sp, sp, #128
 ; CHECK-NEXT:    ret
 ;
@@ -2262,32 +2263,32 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) {
 define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzs_v8f16_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    mov z2.h, z0.h[3]
-; CHECK-NEXT:    mov z3.h, z0.h[2]
-; CHECK-NEXT:    fcvtzs x8, h0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    fcvtzs x9, h1
-; CHECK-NEXT:    fcvtzs x10, h2
-; CHECK-NEXT:    fcvtzs x11, h3
-; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    mov z2.h, z0.h[3]
+; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    fcvtzs x12, h0
-; CHECK-NEXT:    mov z0.h, z0.h[2]
-; CHECK-NEXT:    stp x8, x9, [sp, #32]
+; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
+; CHECK-NEXT:    mov z2.h, z1.h[1]
 ; CHECK-NEXT:    fcvtzs x8, h1
+; CHECK-NEXT:    mov z3.h, z1.h[3]
+; CHECK-NEXT:    mov z1.h, z1.h[2]
 ; CHECK-NEXT:    fcvtzs x9, h2
-; CHECK-NEXT:    stp x11, x10, [sp, #48]
+; CHECK-NEXT:    mov z2.h, z0.h[1]
+; CHECK-NEXT:    fcvtzs x10, h3
+; CHECK-NEXT:    mov z3.h, z0.h[3]
+; CHECK-NEXT:    fcvtzs x11, h1
+; CHECK-NEXT:    mov z0.h, z0.h[2]
+; CHECK-NEXT:    stp x8, x9, [sp, #-64]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    fcvtzs x8, h2
+; CHECK-NEXT:    fcvtzs x9, h3
+; CHECK-NEXT:    stp x11, x10, [sp, #16]
 ; CHECK-NEXT:    fcvtzs x10, h0
-; CHECK-NEXT:    ldp q2, q3, [sp, #32]
-; CHECK-NEXT:    stp x12, x8, [sp]
-; CHECK-NEXT:    stp x10, x9, [sp, #16]
-; CHECK-NEXT:    ldp q1, q0, [sp]
-; CHECK-NEXT:    stp q2, q3, [x1]
-; CHECK-NEXT:    stp q1, q0, [x1, #32]
+; CHECK-NEXT:    ldp q2, q3, [sp]
+; CHECK-NEXT:    stp x12, x8, [sp, #32]
+; CHECK-NEXT:    stp x10, x9, [sp, #48]
+; CHECK-NEXT:    ldp q1, q0, [sp, #32]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q1, q0, [x1]
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 ;
@@ -2341,55 +2342,56 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
 define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzs_v16f16_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    mov z2.h, z1.h[1]
-; CHECK-NEXT:    mov z3.h, z1.h[3]
-; CHECK-NEXT:    mov z4.h, z1.h[2]
-; CHECK-NEXT:    fcvtzs x8, h1
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    mov z5.h, z0.h[3]
-; CHECK-NEXT:    fcvtzs x10, h0
-; CHECK-NEXT:    fcvtzs x9, h2
-; CHECK-NEXT:    fcvtzs x11, h3
-; CHECK-NEXT:    fcvtzs x12, h4
-; CHECK-NEXT:    mov z2.h, z1.h[1]
-; CHECK-NEXT:    mov z4.h, z1.h[3]
-; CHECK-NEXT:    fcvtzs x13, h1
-; CHECK-NEXT:    mov z1.h, z1.h[2]
-; CHECK-NEXT:    mov z3.h, z0.h[1]
-; CHECK-NEXT:    stp x8, x9, [sp, #32]
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    ext z2.b, z2.b, z1.b, #8
+; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT:    mov z4.h, z2.h[1]
 ; CHECK-NEXT:    fcvtzs x8, h2
+; CHECK-NEXT:    mov z5.h, z2.h[3]
+; CHECK-NEXT:    mov z2.h, z2.h[2]
+; CHECK-NEXT:    fcvtzs x12, h3
 ; CHECK-NEXT:    fcvtzs x9, h4
-; CHECK-NEXT:    stp x12, x11, [sp, #48]
+; CHECK-NEXT:    mov z4.h, z3.h[1]
+; CHECK-NEXT:    fcvtzs x10, h5
+; CHECK-NEXT:    mov z5.h, z3.h[3]
+; CHECK-NEXT:    fcvtzs x11, h2
+; CHECK-NEXT:    mov z2.h, z3.h[2]
+; CHECK-NEXT:    stp x8, x9, [sp, #-128]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    fcvtzs x8, h4
+; CHECK-NEXT:    fcvtzs x9, h5
+; CHECK-NEXT:    stp x11, x10, [sp, #16]
+; CHECK-NEXT:    fcvtzs x10, h2
+; CHECK-NEXT:    mov z3.h, z1.h[1]
+; CHECK-NEXT:    mov z4.h, z1.h[3]
 ; CHECK-NEXT:    fcvtzs x11, h1
-; CHECK-NEXT:    mov z2.h, z0.h[2]
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    mov z1.h, z1.h[2]
+; CHECK-NEXT:    mov z2.h, z0.h[1]
+; CHECK-NEXT:    stp x12, x8, [sp, #64]
 ; CHECK-NEXT:    fcvtzs x12, h3
-; CHECK-NEXT:    stp x13, x8, [sp]
-; CHECK-NEXT:    fcvtzs x8, h5
-; CHECK-NEXT:    stp x11, x9, [sp, #16]
-; CHECK-NEXT:    fcvtzs x9, h2
-; CHECK-NEXT:    mov z1.h, z0.h[1]
-; CHECK-NEXT:    mov z2.h, z0.h[3]
-; CHECK-NEXT:    fcvtzs x11, h0
+; CHECK-NEXT:    fcvtzs x8, h4
+; CHECK-NEXT:    stp x10, x9, [sp, #80]
+; CHECK-NEXT:    fcvtzs x9, h1
+; CHECK-NEXT:    mov z3.h, z0.h[3]
+; CHECK-NEXT:    fcvtzs x10, h0
 ; CHECK-NEXT:    mov z0.h, z0.h[2]
-; CHECK-NEXT:    stp x10, x12, [sp, #96]
-; CHECK-NEXT:    ldp q3, q4, [sp]
-; CHECK-NEXT:    fcvtzs x10, h1
-; CHECK-NEXT:    fcvtzs x12, h2
-; CHECK-NEXT:    stp x9, x8, [sp, #112]
+; CHECK-NEXT:    stp x11, x12, [sp, #32]
+; CHECK-NEXT:    fcvtzs x11, h2
+; CHECK-NEXT:    fcvtzs x12, h3
+; CHECK-NEXT:    stp x9, x8, [sp, #48]
 ; CHECK-NEXT:    fcvtzs x8, h0
-; CHECK-NEXT:    ldp q0, q1, [sp, #32]
-; CHECK-NEXT:    ldp q6, q7, [sp, #96]
-; CHECK-NEXT:    stp x11, x10, [sp, #64]
-; CHECK-NEXT:    stp x8, x12, [sp, #80]
-; CHECK-NEXT:    ldp q5, q2, [sp, #64]
-; CHECK-NEXT:    stp q0, q1, [x1]
-; CHECK-NEXT:    stp q3, q4, [x1, #32]
-; CHECK-NEXT:    stp q6, q7, [x1, #64]
-; CHECK-NEXT:    stp q5, q2, [x1, #96]
+; CHECK-NEXT:    ldp q0, q1, [sp]
+; CHECK-NEXT:    ldp q3, q4, [sp, #64]
+; CHECK-NEXT:    stp x10, x11, [sp, #96]
+; CHECK-NEXT:    ldp q6, q7, [sp, #32]
+; CHECK-NEXT:    stp x8, x12, [sp, #112]
+; CHECK-NEXT:    ldp q5, q2, [sp, #96]
+; CHECK-NEXT:    stp q0, q1, [x1, #32]
+; CHECK-NEXT:    stp q6, q7, [x1]
+; CHECK-NEXT:    stp q3, q4, [x1, #96]
+; CHECK-NEXT:    stp q5, q2, [x1, #64]
 ; CHECK-NEXT:    add sp, sp, #128
 ; CHECK-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index e09b1613a54af..233939f7285fa 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -127,19 +127,17 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    mov z2.d, z1.d
-; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    sunpklo z2.h, z1.b
+; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z2.b, z2.b, z1.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z2.h, z2.b
-; CHECK-NEXT:    sunpklo z3.h, z3.b
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z4.s, z2.h
 ; CHECK-NEXT:    sunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    sunpklo z1.h, z1.b
+; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z2.h
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
@@ -157,11 +155,11 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    splice z1.h, p0, z1.h, z2.h
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z3.h, p0, z3.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z1.b, z1.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
 ; CHECK-NEXT:    splice z0.b, p0, z0.b, z1.b
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -244,31 +242,28 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-LABEL: sdiv_v32i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q6, q2, [x0]
+; CHECK-NEXT:    ldp q6, q3, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ldp q7, q3, [x1]
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    mov z16.d, z6.d
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ext z1.b, z1.b, z2.b, #8
-; CHECK-NEXT:    ext z16.b, z16.b, z6.b, #8
-; CHECK-NEXT:    sunpklo z6.h, z6.b
-; CHECK-NEXT:    ext z0.b, z0.b, z3.b, #8
+; CHECK-NEXT:    ldr q2, [x0, #16]
+; CHECK-NEXT:    sunpklo z1.h, z3.b
+; CHECK-NEXT:    sunpklo z4.h, z2.b
+; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
+; CHECK-NEXT:    sunpklo z7.h, z6.b
+; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    sunpklo z3.h, z3.b
-; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    sunpklo z16.h, z16.b
-; CHECK-NEXT:    sunpklo z4.h, z0.b
-; CHECK-NEXT:    sunpklo z5.s, z1.h
+; CHECK-NEXT:    sunpklo z0.s, z1.h
+; CHECK-NEXT:    sunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z18.s, z16.h
-; CHECK-NEXT:    sunpklo z0.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
-; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
+; CHECK-NEXT:    sunpklo z17.s, z7.h
+; CHECK-NEXT:    ext z7.b, z7.b, z7.b, #8
+; CHECK-NEXT:    sunpklo z6.h, z6.b
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z4.s, z4.h
-; CHECK-NEXT:    sunpklo z16.s, z16.h
+; CHECK-NEXT:    sunpklo z7.s, z7.h
 ; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z5.s
-; CHECK-NEXT:    sdiv z1.s, p0/m, z1.s, z4.s
+; CHECK-NEXT:    sdivr z1.s, p0/m, z1.s, z4.s
 ; CHECK-NEXT:    sunpklo z4.h, z2.b
 ; CHECK-NEXT:    sunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
@@ -278,44 +273,44 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sunpklo z4.s, z4.h
 ; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT:    mov z5.d, z7.d
-; CHECK-NEXT:    ext z5.b, z5.b, z7.b, #8
-; CHECK-NEXT:    sunpklo z7.h, z7.b
+; CHECK-NEXT:    ldr q5, [x0]
+; CHECK-NEXT:    sunpklo z16.h, z5.b
+; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    sunpklo z5.h, z5.b
-; CHECK-NEXT:    sunpklo z17.s, z5.h
+; CHECK-NEXT:    sunpklo z18.s, z16.h
+; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
+; CHECK-NEXT:    sunpklo z16.s, z16.h
+; CHECK-NEXT:    sdivr z17.s, p0/m, z17.s, z18.s
+; CHECK-NEXT:    sunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    sdivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT:    sunpklo z18.s, z6.h
+; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
+; CHECK-NEXT:    sunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    sunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z16.s
-; CHECK-NEXT:    sunpklo z16.s, z7.h
-; CHECK-NEXT:    ext z7.b, z7.b, z7.b, #8
-; CHECK-NEXT:    sunpklo z7.s, z7.h
-; CHECK-NEXT:    uzp1 z4.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT:    sdiv z6.s, p0/m, z6.s, z7.s
+; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z16.h, z16.h
+; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z17.h, z17.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    splice z4.h, p0, z4.h, z5.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT:    uzp1 z7.h, z16.h, z16.h
-; CHECK-NEXT:    uzp1 z1.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    splice z4.h, p0, z4.h, z6.h
+; CHECK-NEXT:    splice z7.h, p0, z7.h, z5.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z7.h, p0, z7.h, z6.h
-; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z2.b, p0, z2.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, z2.h, z3.h
 ; CHECK-NEXT:    uzp1 z3.b, z7.b, z7.b
-; CHECK-NEXT:    splice z3.b, p0, z3.b, z1.b
-; CHECK-NEXT:    stp q3, q2, [x0]
+; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    splice z1.b, p0, z1.b, z3.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    splice z0.b, p0, z0.b, z2.b
+; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v32i8:
@@ -542,20 +537,18 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    mov z2.d, z1.d
-; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    sunpklo z2.s, z1.h
+; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z2.b, z2.b, z1.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z2.s, z2.h
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
+; CHECK-NEXT:    sdivr z1.s, p0/m, z1.s, z0.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT:   ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/95242


More information about the llvm-commits mailing list