[llvm] 39fcb4a - [AArch64] Add tests for lowering trunc to i8 using tbl.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 8 07:45:46 PDT 2022
Author: Florian Hahn
Date: 2022-09-08T15:45:32+01:00
New Revision: 39fcb4a2684ab1ded9ef62d618c1f26c5db30fd2
URL: https://github.com/llvm/llvm-project/commit/39fcb4a2684ab1ded9ef62d618c1f26c5db30fd2
DIFF: https://github.com/llvm/llvm-project/commit/39fcb4a2684ab1ded9ef62d618c1f26c5db30fd2.diff
LOG: [AArch64] Add tests for lowering trunc to i8 using tbl.
Added:
llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
new file mode 100644
index 000000000000..672a0f16640e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+; It's profitable to use a single tbl.4 instruction to lower the truncate.
+define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
+; CHECK-LABEL: trunc_v16i32_to_v16i8_in_loop:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: LBB0_1: ; %loop
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x9, x0, x8, lsl #6
+; CHECK-NEXT: ldp q1, q0, [x9, #32]
+; CHECK-NEXT: ldp q3, q2, [x9]
+; CHECK-NEXT: uzp1.8h v0, v1, v0
+; CHECK-NEXT: uzp1.8h v1, v3, v2
+; CHECK-NEXT: uzp1.16b v0, v1, v0
+; CHECK-NEXT: str q0, [x1, x8, lsl #4]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: b.eq LBB0_1
+; CHECK-NEXT: ; %bb.2: ; %exit
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr inbounds <16 x i32>, ptr %A, i64 %iv
+ %l.A = load <16 x i32>, ptr %gep.A
+ %trunc = trunc <16 x i32> %l.A to <16 x i8>
+ %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
+ store <16 x i8> %trunc, ptr %gep.dst
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Not profitable to use tbl, as materializing the masks requires more
+; instructions.
+define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
+; CHECK-LABEL: trunc_v16i32_to_v16i8_no_loop:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1.8h v0, v1, v0
+; CHECK-NEXT: uzp1.8h v1, v3, v2
+; CHECK-NEXT: uzp1.16b v0, v1, v0
+; CHECK-NEXT: str q0, [x1]
+; CHECK-NEXT: ret
+entry:
+ %l.A = load <16 x i32>, ptr %A
+ %trunc = trunc <16 x i32> %l.A to <16 x i8>
+ store <16 x i8> %trunc, ptr %dst
+ ret void
+}
+
+; It's profitable to use a single tbl.2 instruction to lower the truncate.
+define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
+; CHECK-LABEL: trunc_v8i32_to_v8i8_in_loop:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: LBB2_1: ; %loop
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x9, x0, x8, lsl #5
+; CHECK-NEXT: ldp q1, q0, [x9]
+; CHECK-NEXT: uzp1.8h v0, v1, v0
+; CHECK-NEXT: xtn.8b v0, v0
+; CHECK-NEXT: str d0, [x1, x8, lsl #3]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: b.eq LBB2_1
+; CHECK-NEXT: ; %bb.2: ; %exit
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr inbounds <8 x i32>, ptr %A, i64 %iv
+ %l.A = load <8 x i32>, ptr %gep.A
+ %trunc = trunc <8 x i32> %l.A to <8 x i8>
+ %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
+ store <8 x i8> %trunc, ptr %gep.dst
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list