[llvm] 8f19de8 - [AArch64] Add big-endian tests for zext-to-tbl.ll
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 15 06:03:10 PDT 2022
Author: Florian Hahn
Date: 2022-09-15T14:01:27+01:00
New Revision: 8f19de848b968bfdd237bdb6ffb65e7412bb6a0c
URL: https://github.com/llvm/llvm-project/commit/8f19de848b968bfdd237bdb6ffb65e7412bb6a0c
DIFF: https://github.com/llvm/llvm-project/commit/8f19de848b968bfdd237bdb6ffb65e7412bb6a0c.diff
LOG: [AArch64] Add big-endian tests for zext-to-tbl.ll
Extra tests for D120571.
Added:
Modified:
llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 52fd23783a00..0daf3714e3a6 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -o - %s | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
+; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
; It's profitable to convert the zext to a shuffle, which in turn will be
; lowered to 4 tbl instructions. The masks are materialized outside the loop.
@@ -26,6 +24,33 @@ define void @zext_v16i8_to_v16i32_in_loop(i8* %src, i32* %dst) {
; CHECK-NEXT: b.ne LBB0_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: .LBB0_1: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #16
+; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
+; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
+; CHECK-BE-NEXT: b.ne .LBB0_1
+; CHECK-BE-NEXT: // %bb.2: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
@@ -74,6 +99,40 @@ define void @zext_v16i8_to_v16i32_in_loop_not_header(i8* %src, i32* %dst, i1 %c)
; CHECK-NEXT: b LBB1_1
; CHECK-NEXT: LBB1_4: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_not_header:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: b .LBB1_2
+; CHECK-BE-NEXT: .LBB1_1: // %loop.latch
+; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: b.eq .LBB1_4
+; CHECK-BE-NEXT: .LBB1_2: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: tbz w2, #0, .LBB1_1
+; CHECK-BE-NEXT: // %bb.3: // %then
+; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: add x11, x1, #16
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
+; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
+; CHECK-BE-NEXT: st1 { v3.4s }, [x11]
+; CHECK-BE-NEXT: st1 { v0.4s }, [x1]
+; CHECK-BE-NEXT: b .LBB1_1
+; CHECK-BE-NEXT: .LBB1_4: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
@@ -115,6 +174,24 @@ define void @zext_v16i8_to_v16i32_no_loop(i8* %src, i32* %dst) {
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q0, q3, [x1]
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i32_no_loop:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x0]
+; CHECK-BE-NEXT: add x8, x1, #48
+; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x8]
+; CHECK-BE-NEXT: add x8, x1, #32
+; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
+; CHECK-BE-NEXT: add x8, x1, #16
+; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
+; CHECK-BE-NEXT: st1 { v0.4s }, [x1]
+; CHECK-BE-NEXT: ret
entry:
%src.cast = bitcast i8* %src to <16 x i8>*
%load = load <16 x i8>, <16 x i8>* %src.cast
@@ -145,6 +222,33 @@ define void @zext_v16i8_to_v16i32_in_loop_optsize(i8* %src, i32* %dst) optsize {
; CHECK-NEXT: b.ne LBB3_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_optsize:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: .LBB3_1: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #16
+; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
+; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
+; CHECK-BE-NEXT: b.ne .LBB3_1
+; CHECK-BE-NEXT: // %bb.2: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
@@ -186,6 +290,33 @@ define void @zext_v16i8_to_v16i32_in_loop_minsize(i8* %src, i32* %dst) minsize {
; CHECK-NEXT: b.ne LBB4_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_minsize:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: .LBB4_1: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #16
+; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
+; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
+; CHECK-BE-NEXT: b.ne .LBB4_1
+; CHECK-BE-NEXT: // %bb.2: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
@@ -223,6 +354,25 @@ define void @zext_v16i8_to_v16i16_in_loop(i8* %src, i16* %dst) {
; CHECK-NEXT: b.ne LBB5_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i16_in_loop:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: .LBB5_1: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #16
+; CHECK-BE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-BE-NEXT: st1 { v1.8h }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #32
+; CHECK-BE-NEXT: st1 { v0.8h }, [x9]
+; CHECK-BE-NEXT: b.ne .LBB5_1
+; CHECK-BE-NEXT: // %bb.2: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
@@ -259,6 +409,26 @@ define void @zext_v8i8_to_v8i32_in_loop(i8* %src, i32* %dst) {
; CHECK-NEXT: b.ne LBB6_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v8i8_to_v8i32_in_loop:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: .LBB6_1: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #16
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-BE-NEXT: st1 { v1.4s }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
+; CHECK-BE-NEXT: b.ne .LBB6_1
+; CHECK-BE-NEXT: // %bb.2: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
@@ -309,6 +479,49 @@ define void @zext_v16i8_to_v16i64_in_loop(i8* %src, i64* %dst) {
; CHECK-NEXT: b.ne LBB7_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_loop:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: .LBB7_1: // %loop
+; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: add x9, x0, x8
+; CHECK-BE-NEXT: add x10, x1, #96
+; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #112
+; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-BE-NEXT: ushll2 v3.2d, v2.4s, #0
+; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #80
+; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0
+; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
+; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-BE-NEXT: add x10, x1, #48
+; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #64
+; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0
+; CHECK-BE-NEXT: st1 { v1.2d }, [x9]
+; CHECK-BE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-BE-NEXT: add x9, x1, #16
+; CHECK-BE-NEXT: st1 { v4.2d }, [x10]
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: st1 { v1.2d }, [x1]
+; CHECK-BE-NEXT: add x1, x1, #128
+; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
+; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
+; CHECK-BE-NEXT: b.ne .LBB7_1
+; CHECK-BE-NEXT: // %bb.2: // %exit
+; CHECK-BE-NEXT: ret
entry:
br label %loop
More information about the llvm-commits
mailing list