[llvm] c679fbe - [AArch64] Add tests for tbl + cmp splitting.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 25 09:59:51 PST 2022


Author: Florian Hahn
Date: 2022-02-25T17:59:44Z
New Revision: c679fbee2a76fce6ad4167c95c2e0276b0a8cc81

URL: https://github.com/llvm/llvm-project/commit/c679fbee2a76fce6ad4167c95c2e0276b0a8cc81
DIFF: https://github.com/llvm/llvm-project/commit/c679fbee2a76fce6ad4167c95c2e0276b0a8cc81.diff

LOG: [AArch64] Add tests for tbl + cmp splitting.

Additional tests showing potential for follow-ups after
D120571.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/vselect-ext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll
index 79eb87c405751..3186df57ea55a 100644
--- a/llvm/test/CodeGen/AArch64/vselect-ext.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll
@@ -197,3 +197,206 @@ entry:
   %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
   ret <16 x i32> %sel
 }
+
+define void @extension_in_loop_v16i8_to_v16i32(i8* %src, i32* %dst) {
+; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    movi.2d v0, #0xffffffffffffffff
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:  LBB7_1: ; %loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr q1, [x0, x8]
+; CHECK-NEXT:    add x8, x8, #16
+; CHECK-NEXT:    cmp x8, #128
+; CHECK-NEXT:    cmgt.16b v2, v1, v0
+; CHECK-NEXT:    ushll2.8h v3, v1, #0
+; CHECK-NEXT:    sshll2.8h v4, v2, #0
+; CHECK-NEXT:    ushll2.4s v5, v3, #0
+; CHECK-NEXT:    ushll.4s v3, v3, #0
+; CHECK-NEXT:    sshll2.4s v6, v4, #0
+; CHECK-NEXT:    sshll.4s v4, v4, #0
+; CHECK-NEXT:    ushll.8h v1, v1, #0
+; CHECK-NEXT:    sshll.8h v2, v2, #0
+; CHECK-NEXT:    and.16b v5, v5, v6
+; CHECK-NEXT:    and.16b v3, v3, v4
+; CHECK-NEXT:    stp q3, q5, [x1, #32]
+; CHECK-NEXT:    sshll2.4s v4, v2, #0
+; CHECK-NEXT:    sshll.4s v2, v2, #0
+; CHECK-NEXT:    ushll2.4s v3, v1, #0
+; CHECK-NEXT:    ushll.4s v1, v1, #0
+; CHECK-NEXT:    and.16b v3, v3, v4
+; CHECK-NEXT:    and.16b v1, v1, v2
+; CHECK-NEXT:    stp q1, q3, [x1], #64
+; CHECK-NEXT:    b.ne LBB7_1
+; CHECK-NEXT:  ; %bb.2: ; %exit
+; CHECK-NEXT:    ret
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %src.gep = getelementptr i8, i8* %src, i64 %iv
+  %src.gep.cast = bitcast i8* %src.gep to <16 x i8>*
+  %load = load <16 x i8>, <16 x i8>* %src.gep.cast
+  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %ext = zext <16 x i8> %load to <16 x i32>
+  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
+  %dst.gep = getelementptr i32, i32* %dst, i64 %iv
+  %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>*
+  store <16 x i32> %sel, <16 x i32>* %dst.gep.cast
+  %iv.next = add nuw i64 %iv, 16
+  %ec = icmp eq i64 %iv.next, 128
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(i8* %src, i32* %dst) {
+; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x9, lCPI8_0 at PAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    adrp x10, lCPI8_1 at PAGE
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    adrp x11, lCPI8_2 at PAGE
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    adrp x12, lCPI8_3 at PAGE
+; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    movi.2d v3, #0000000000000000
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    ldr q0, [x9, lCPI8_0 at PAGEOFF]
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    ldr q2, [x10, lCPI8_1 at PAGEOFF]
+; CHECK-NEXT:  Lloh6:
+; CHECK-NEXT:    ldr q5, [x11, lCPI8_2 at PAGEOFF]
+; CHECK-NEXT:  Lloh7:
+; CHECK-NEXT:    ldr q6, [x12, lCPI8_3 at PAGEOFF]
+; CHECK-NEXT:  LBB8_1: ; %loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr q4, [x0, x8]
+; CHECK-NEXT:    add x8, x8, #16
+; CHECK-NEXT:    cmp x8, #128
+; CHECK-NEXT:    cmgt.16b v7, v4, v1
+; CHECK-NEXT:    tbl.16b v16, { v3, v4 }, v0
+; CHECK-NEXT:    tbl.16b v17, { v3, v4 }, v2
+; CHECK-NEXT:    sshll2.8h v20, v7, #0
+; CHECK-NEXT:    tbl.16b v18, { v3, v4 }, v5
+; CHECK-NEXT:    sshll2.4s v21, v20, #0
+; CHECK-NEXT:    sshll.4s v20, v20, #0
+; CHECK-NEXT:    tbl.16b v19, { v3, v4 }, v6
+; CHECK-NEXT:    sshll.8h v7, v7, #0
+; CHECK-NEXT:    and.16b v16, v16, v21
+; CHECK-NEXT:    and.16b v17, v17, v20
+; CHECK-NEXT:    stp q17, q16, [x1, #32]
+; CHECK-NEXT:    sshll2.4s v16, v7, #0
+; CHECK-NEXT:    sshll.4s v7, v7, #0
+; CHECK-NEXT:    and.16b v16, v18, v16
+; CHECK-NEXT:    and.16b v7, v19, v7
+; CHECK-NEXT:    stp q7, q16, [x1], #64
+; CHECK-NEXT:    b.ne LBB8_1
+; CHECK-NEXT:  ; %bb.2: ; %exit
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    .loh AdrpLdr Lloh3, Lloh7
+; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh6
+; CHECK-NEXT:    .loh AdrpLdr Lloh1, Lloh5
+; CHECK-NEXT:    .loh AdrpLdr Lloh0, Lloh4
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %src.gep = getelementptr i8, i8* %src, i64 %iv
+  %src.gep.cast = bitcast i8* %src.gep to <16 x i8>*
+  %load = load <16 x i8>, <16 x i8>* %src.gep.cast
+  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15>
+  %ext = bitcast <64 x i8> %ext.shuf to <16 x i32>
+  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
+  %dst.gep = getelementptr i32, i32* %dst, i64 %iv
+  %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>*
+  store <16 x i32> %sel, <16 x i32>* %dst.gep.cast
+  %iv.next = add nuw i64 %iv, 16
+  %ec = icmp eq i64 %iv.next, 128
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(i8* %src, i32* %dst) {
+; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:  Lloh8:
+; CHECK-NEXT:    adrp x9, lCPI9_0 at PAGE
+; CHECK-NEXT:  Lloh9:
+; CHECK-NEXT:    adrp x10, lCPI9_1 at PAGE
+; CHECK-NEXT:  Lloh10:
+; CHECK-NEXT:    adrp x11, lCPI9_2 at PAGE
+; CHECK-NEXT:  Lloh11:
+; CHECK-NEXT:    adrp x12, lCPI9_3 at PAGE
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    movi.2d v5, #0xffffffffffffffff
+; CHECK-NEXT:  Lloh12:
+; CHECK-NEXT:    ldr q0, [x9, lCPI9_0 at PAGEOFF]
+; CHECK-NEXT:  Lloh13:
+; CHECK-NEXT:    ldr q4, [x10, lCPI9_1 at PAGEOFF]
+; CHECK-NEXT:  Lloh14:
+; CHECK-NEXT:    ldr q6, [x11, lCPI9_2 at PAGEOFF]
+; CHECK-NEXT:  Lloh15:
+; CHECK-NEXT:    ldr q7, [x12, lCPI9_3 at PAGEOFF]
+; CHECK-NEXT:  LBB9_1: ; %loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr q1, [x0, x8]
+; CHECK-NEXT:    add x8, x8, #16
+; CHECK-NEXT:    cmp x8, #128
+; CHECK-NEXT:    cmgt.16b v16, v1, v5
+; CHECK-NEXT:    mov.16b v3, v1
+; CHECK-NEXT:    sshll2.8h v17, v16, #0
+; CHECK-NEXT:    tbl.16b v18, { v2, v3 }, v0
+; CHECK-NEXT:    tbl.16b v19, { v2, v3 }, v4
+; CHECK-NEXT:    sshll.8h v16, v16, #0
+; CHECK-NEXT:    tbl.16b v20, { v2, v3 }, v6
+; CHECK-NEXT:    tbl.16b v21, { v1, v2 }, v7
+; CHECK-NEXT:    sshll2.4s v22, v17, #0
+; CHECK-NEXT:    sshll.4s v17, v17, #0
+; CHECK-NEXT:    sshll2.4s v23, v16, #0
+; CHECK-NEXT:    sshll.4s v16, v16, #0
+; CHECK-NEXT:    and.16b v18, v18, v22
+; CHECK-NEXT:    and.16b v17, v19, v17
+; CHECK-NEXT:    stp q17, q18, [x1, #32]
+; CHECK-NEXT:    and.16b v17, v20, v23
+; CHECK-NEXT:    and.16b v16, v21, v16
+; CHECK-NEXT:    stp q16, q17, [x1], #64
+; CHECK-NEXT:    b.ne LBB9_1
+; CHECK-NEXT:  ; %bb.2: ; %exit
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    .loh AdrpLdr Lloh11, Lloh15
+; CHECK-NEXT:    .loh AdrpLdr Lloh10, Lloh14
+; CHECK-NEXT:    .loh AdrpLdr Lloh9, Lloh13
+; CHECK-NEXT:    .loh AdrpLdr Lloh8, Lloh12
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %src.gep = getelementptr i8, i8* %src, i64 %iv
+  %src.gep.cast = bitcast i8* %src.gep to <16 x i8>*
+  %load = load <16 x i8>, <16 x i8>* %src.gep.cast
+  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 1, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15>
+  %ext = bitcast <64 x i8> %ext.shuf to <16 x i32>
+  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
+  %dst.gep = getelementptr i32, i32* %dst, i64 %iv
+  %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>*
+  store <16 x i32> %sel, <16 x i32>* %dst.gep.cast
+  %iv.next = add nuw i64 %iv, 16
+  %ec = icmp eq i64 %iv.next, 128
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list