[llvm] af56c4a - [AArch64] Add an aarch64-enable-ext-to-tbl option. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 21 03:20:25 PDT 2023
Author: David Green
Date: 2023-09-21T11:20:19+01:00
New Revision: af56c4a4cbd70e186c4abd04f61ba6d2141b3942
URL: https://github.com/llvm/llvm-project/commit/af56c4a4cbd70e186c4abd04f61ba6d2141b3942
DIFF: https://github.com/llvm/llvm-project/commit/af56c4a4cbd70e186c4abd04f61ba6d2141b3942.diff
LOG: [AArch64] Add an aarch64-enable-ext-to-tbl option. NFC
This transform has caused a few issues with operations that can naturally be
extended. This patch just adds a debug option for disabling the transform,
useful for testing cases where it might not be profitable.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ad01a206c93fb39..c871d22c278be13 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -131,6 +131,10 @@ EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
"gather intrinsics"),
cl::init(true));
+static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
+ cl::desc("Combine ext and trunc to TBL"),
+ cl::init(true));
+
// All of the XOR, OR and CMP use ALU ports, and data dependency will become the
// bottleneck after this transform on high end CPU. So this max leaf node
// limitation is guard cmp+ccmp will be profitable.
@@ -14791,7 +14795,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
Instruction *I, Loop *L, const TargetTransformInfo &TTI) const {
// shuffle_vector instructions are serialized when targeting SVE,
// see LowerSPLAT_VECTOR. This peephole is not beneficial.
- if (Subtarget->useSVEForFixedLengthVectors())
+ if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
return false;
// Try to optimize conversions using tbl. This requires materializing constant
diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index 9650a9b121654bc..ba367b0dbfde345 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
+; RUN: llc -mtriple=aarch64_be-unknown-linux -aarch64-enable-ext-to-tbl=false -o - %s | FileCheck --check-prefix=CHECK-DISABLE %s
; CHECK-LABEL: lCPI0_0:
; CHECK-NEXT: .byte 0 ; 0x0
@@ -85,7 +86,30 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB0_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
+;
+; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB0_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
+; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
+; CHECK-DISABLE-NEXT: add x10, x9, #16
+; CHECK-DISABLE-NEXT: add x11, x9, #48
+; CHECK-DISABLE-NEXT: add x9, x9, #32
+; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
+; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x11]
+; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB0_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -131,6 +155,21 @@ define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
; CHECK-BE-NEXT: st1 { v0.16b }, [x1]
; CHECK-BE-NEXT: ret
+;
+; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_no_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: add x8, x0, #16
+; CHECK-DISABLE-NEXT: add x9, x0, #48
+; CHECK-DISABLE-NEXT: add x10, x0, #32
+; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0]
+; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x8]
+; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x9]
+; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x10]
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1]
+; CHECK-DISABLE-NEXT: ret
entry:
%l.A = load <16 x i32>, ptr %A
%trunc = trunc <16 x i32> %l.A to <16 x i8>
@@ -216,7 +255,25 @@ define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB2_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
+;
+; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB2_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
+; CHECK-DISABLE-NEXT: add x10, x9, #16
+; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
+; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB2_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -330,8 +387,42 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB3_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
-
+;
+; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB3_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #7
+; CHECK-DISABLE-NEXT: add x10, x9, #16
+; CHECK-DISABLE-NEXT: add x11, x9, #48
+; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
+; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
+; CHECK-DISABLE-NEXT: add x10, x9, #112
+; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
+; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x10]
+; CHECK-DISABLE-NEXT: add x10, x9, #96
+; CHECK-DISABLE-NEXT: add x11, x9, #32
+; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
+; CHECK-DISABLE-NEXT: add x10, x9, #80
+; CHECK-DISABLE-NEXT: add x9, x9, #64
+; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x11]
+; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x10]
+; CHECK-DISABLE-NEXT: ld1 { v7.2d }, [x9]
+; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: uzp1 v3.4s, v4.4s, v3.4s
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v6.4s
+; CHECK-DISABLE-NEXT: uzp1 v2.4s, v5.4s, v2.4s
+; CHECK-DISABLE-NEXT: uzp1 v1.8h, v4.8h, v3.8h
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB3_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -431,8 +522,31 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB4_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
-
+;
+; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB4_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
+; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
+; CHECK-DISABLE-NEXT: add x10, x9, #16
+; CHECK-DISABLE-NEXT: add x11, x9, #48
+; CHECK-DISABLE-NEXT: add x9, x9, #32
+; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
+; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
+; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-DISABLE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB4_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -529,7 +643,48 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB5_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
+;
+; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
+; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18]
+; CHECK-DISABLE-NEXT: lsr x11, x9, #40
+; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7
+; CHECK-DISABLE-NEXT: lsr x15, x10, #45
+; CHECK-DISABLE-NEXT: lsr x13, x10, #40
+; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14
+; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7
+; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16]
+; CHECK-DISABLE-NEXT: fmov s0, w15
+; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14
+; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18
+; CHECK-DISABLE-NEXT: add x0, x0, #32
+; CHECK-DISABLE-NEXT: fmov s1, w11
+; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8
+; CHECK-DISABLE-NEXT: lsl x12, x9, #24
+; CHECK-DISABLE-NEXT: mov v0.s[1], w13
+; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
+; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
+; CHECK-DISABLE-NEXT: orr w12, w11, w12
+; CHECK-DISABLE-NEXT: mov v1.s[1], w14
+; CHECK-DISABLE-NEXT: lsr w12, w12, #19
+; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
+; CHECK-DISABLE-NEXT: mov v0.s[2], w13
+; CHECK-DISABLE-NEXT: mov v1.s[2], w12
+; CHECK-DISABLE-NEXT: mov v0.s[3], w9
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: mov v1.s[3], w11
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB5_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -610,7 +765,41 @@ define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB6_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
+;
+; CHECK-DISABLE-LABEL: trunc_v11i64_to_v11i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
+; CHECK-DISABLE-NEXT: .LBB6_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, #64
+; CHECK-DISABLE-NEXT: add x10, x0, #16
+; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x0]
+; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x0, #48
+; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
+; CHECK-DISABLE-NEXT: add x10, x0, #32
+; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x9]
+; CHECK-DISABLE-NEXT: ldr d5, [x0, #80]
+; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
+; CHECK-DISABLE-NEXT: add x9, x1, #10
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
+; CHECK-DISABLE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
+; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v5.4s
+; CHECK-DISABLE-NEXT: add x0, x0, #128
+; CHECK-DISABLE-NEXT: uzp1 v2.4s, v4.4s, v2.4s
+; CHECK-DISABLE-NEXT: xtn v0.4h, v0.4s
+; CHECK-DISABLE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
+; CHECK-DISABLE-NEXT: uzp1 v1.16b, v1.16b, v0.16b
+; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
+; CHECK-DISABLE-NEXT: rev16 v2.16b, v1.16b
+; CHECK-DISABLE-NEXT: rev64 v1.16b, v1.16b
+; CHECK-DISABLE-NEXT: st1 { v0.b }[2], [x9]
+; CHECK-DISABLE-NEXT: add x9, x1, #8
+; CHECK-DISABLE-NEXT: st1 { v2.h }[4], [x9]
+; CHECK-DISABLE-NEXT: str d1, [x1], #16
+; CHECK-DISABLE-NEXT: b.eq .LBB6_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -662,10 +851,24 @@ define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB7_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
-
-
-
+;
+; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB7_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
+; CHECK-DISABLE-NEXT: add x10, x9, #16
+; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
+; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x10]
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB7_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
@@ -714,10 +917,22 @@ define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: b.eq .LBB8_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
-
-
-
-
+;
+; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop:
+; CHECK-DISABLE: // %bb.0: // %entry
+; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: .LBB8_1: // %loop
+; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #4
+; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
+; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: b.eq .LBB8_1
+; CHECK-DISABLE-NEXT: // %bb.2: // %exit
+; CHECK-DISABLE-NEXT: ret
entry:
br label %loop
More information about the llvm-commits
mailing list