[llvm] [CodeGen][AArch64] Set min jump table entries to 13 for AArch64 targets (PR #71166)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 14 03:41:46 PST 2023


https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/71166

>From d87168dfc107478878e72422164c472858718f47 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Fri, 3 Nov 2023 10:06:09 +0000
Subject: [PATCH 1/5] [CodeGen][AArch64] Set min jump table entries to 13 for
 AArch64 targets

There are some workloads that are negatively impacted by using jump
tables when the number of entries is small. The SPEC2017 perlbench
benchmark is one example of this, where increasing the threshold to
around 13 gives a ~1.5% improvement on neoverse-v1. I chose the
minimum threshold based on empirical evidence rather than science,
and just manually increased the threshold until I got the best
performance without impacting other workloads. For neoverse-v1 I
saw around ~0.2% improvement in the SPEC2017 integer geomean, and
no overall change for neoverse-n1. If we find issues with this
threshold later on we can always revisit this.

The most significant SPEC2017 score changes on neoverse-v1 were:

500.perlbench_r: +1.6%
520.omnetpp_r: +0.6%

and the rest saw changes < 0.5%.

I updated CodeGen/AArch64/min-jump-table.ll to reflect the new
threshold. For most of the affected tests I manually set the min
number of entries back to 4 on the RUN line because the tests seem
to rely upon this behaviour.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  6 ++
 .../GlobalISel/arm64-irtranslator-switch.ll   |  2 +-
 llvm/test/CodeGen/AArch64/arm64-jumptable.ll  |  4 +-
 .../CodeGen/AArch64/bti-branch-relaxation.ll  |  2 +-
 .../implicit-def-subreg-to-reg-regression.ll  |  2 +-
 llvm/test/CodeGen/AArch64/jump-table-32.ll    |  2 +-
 .../test/CodeGen/AArch64/jump-table-exynos.ll |  4 +-
 llvm/test/CodeGen/AArch64/jump-table.ll       | 12 +--
 llvm/test/CodeGen/AArch64/max-jump-table.ll   | 10 +-
 llvm/test/CodeGen/AArch64/min-jump-table.ll   | 96 ++++++++++++++++++-
 .../AArch64/patchable-function-entry-bti.ll   |  2 +-
 .../AArch64/redundant-mov-from-zero-extend.ll | 41 ++++----
 .../AArch64/switch-unreachable-default.ll     |  2 +-
 llvm/test/CodeGen/AArch64/win64-jumptable.ll  |  4 +-
 llvm/test/CodeGen/AArch64/wineh-bti.ll        |  2 +-
 .../Generic/machine-function-splitter.ll      |  8 +-
 llvm/test/DebugInfo/COFF/jump-table.ll        |  2 +-
 17 files changed, 150 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 291f0c8c5d991c6..002b95c68272f5c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -144,6 +144,10 @@ static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
                                  cl::desc("Maximum of xors"));
 
+static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
+    "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
+    cl::desc("Set minimum number of entries to use a jump table on AArch64"));
+
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
 
@@ -1653,6 +1657,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 
   IsStrictFPEnabled = true;
+
+  setMinimumJumpTableEntries(AArch64MinimumJumpTableEntries);
 }
 
 void AArch64TargetLowering::addTypeForNEON(MVT VT) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll
index 4e4297b7a5e22f8..9371edd439fe49c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple aarch64 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+; RUN: llc -global-isel -mtriple aarch64 -aarch64-min-jump-table-entries=4 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
 
 define i32 @switch(i32 %argc) {
   ; CHECK-LABEL: name: switch
diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
index d4ac9e72b28ff62..7d9adf92a6a95c5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX
+; RUN: llc -mtriple=arm64-apple-ios -aarch64-min-jump-table-entries=4 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -aarch64-min-jump-table-entries=4 < %s | FileCheck %s --check-prefix=CHECK-LINUX
 ; <rdar://11417675>
 
 define void @sum(i32 %a, ptr %to, i32 %c) {
diff --git a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll
index d9ed54fb86bf151..e3e7f42526f785c 100644
--- a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll
+++ b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - | FileCheck %s
+; RUN: llc %s -aarch64-min-jump-table-entries=4 -o - | FileCheck %s
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"
 
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 73b180dc4ae765f..0f208f8ed905241 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
 
 ; Check there's no assert in spilling from implicit-def operands on an
 ; IMPLICIT_DEF.
diff --git a/llvm/test/CodeGen/AArch64/jump-table-32.ll b/llvm/test/CodeGen/AArch64/jump-table-32.ll
index d8572e901af29ec..e65813de8943e3b 100644
--- a/llvm/test/CodeGen/AArch64/jump-table-32.ll
+++ b/llvm/test/CodeGen/AArch64/jump-table-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
diff --git a/llvm/test/CodeGen/AArch64/jump-table-exynos.ll b/llvm/test/CodeGen/AArch64/jump-table-exynos.ll
index b5b400ecfbffcac..61b0df5de2af3a4 100644
--- a/llvm/test/CodeGen/AArch64/jump-table-exynos.ll
+++ b/llvm/test/CodeGen/AArch64/jump-table-exynos.ll
@@ -1,5 +1,5 @@
-; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
-; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 
 ; Exynos doesn't want jump tables to be compressed for now.
 
diff --git a/llvm/test/CodeGen/AArch64/jump-table.ll b/llvm/test/CodeGen/AArch64/jump-table.ll
index 6c32444657542db..bb6c74b3b9df6c7 100644
--- a/llvm/test/CodeGen/AArch64/jump-table.ll
+++ b/llvm/test/CodeGen/AArch64/jump-table.ll
@@ -1,9 +1,9 @@
-; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
-; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s
-; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
-; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s
-; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s
+; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s
+; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
diff --git a/llvm/test/CodeGen/AArch64/max-jump-table.ll b/llvm/test/CodeGen/AArch64/max-jump-table.ll
index d01924a9a54272e..1268f5e6167ad8d 100644
--- a/llvm/test/CodeGen/AArch64/max-jump-table.ll
+++ b/llvm/test/CodeGen/AArch64/max-jump-table.ll
@@ -1,8 +1,8 @@
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40                         -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0  < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=4  -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=8  -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3         -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40                         -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=4  -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=8  -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -mcpu=exynos-m3         -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
 
 declare void @ext(i32, i32)
 
diff --git a/llvm/test/CodeGen/AArch64/min-jump-table.ll b/llvm/test/CodeGen/AArch64/min-jump-table.ll
index bf6bddf7b9c4f73..3b3f79746de0ab7 100644
--- a/llvm/test/CodeGen/AArch64/min-jump-table.ll
+++ b/llvm/test/CodeGen/AArch64/min-jump-table.ll
@@ -1,7 +1,9 @@
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=0 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0  < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=2 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK2  < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
-; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=0 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=2 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK2  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=12 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK12  < %t
+; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT  < %t
 
 declare void @ext(i32, i32)
 
@@ -16,6 +18,8 @@ entry:
 ; CHECK2-NEXT: Jump Tables:
 ; CHECK4-NOT: {{^}}Jump Tables:
 ; CHECK8-NOT: {{^}}Jump Tables:
+; CHECK12-NOT: {{^}}Jump Tables:
+; CHECK-DEFAULT-NOT: {{^}}Jump Tables:
 
 bb1: tail call void @ext(i32 1, i32 0) br label %return
 bb2: tail call void @ext(i32 2, i32 2) br label %return
@@ -36,6 +40,8 @@ entry:
 ; CHECK2-NEXT: Jump Tables:
 ; CHECK4-NEXT: Jump Tables:
 ; CHECK8-NOT: {{^}}Jump Tables:
+; CHECK12-NOT: {{^}}Jump Tables:
+; CHECK-DEFAULT-NOT: {{^}}Jump Tables:
 
 bb1: tail call void @ext(i32 1, i32 0) br label %return
 bb2: tail call void @ext(i32 3, i32 2) br label %return
@@ -58,6 +64,83 @@ entry:
     i32 9, label %bb8
   ]
 ; CHECK-LABEL: function jt8:
+; CHECK0-NEXT: Jump Tables:
+; CHECK2-NEXT: Jump Tables:
+; CHECK4-NEXT: Jump Tables:
+; CHECK8-NEXT: Jump Tables:
+; CHECK12-NOT: Jump Tables:
+; CHECK-DEFAULT-NOT: {{^}}Jump Tables:
+
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
+bb3: tail call void @ext(i32 3, i32 4) br label %return
+bb4: tail call void @ext(i32 4, i32 6) br label %return
+bb5: tail call void @ext(i32 5, i32 8) br label %return
+bb6: tail call void @ext(i32 6, i32 10) br label %return
+bb7: tail call void @ext(i32 7, i32 12) br label %return
+bb8: tail call void @ext(i32 8, i32 14) br label %return
+
+return: ret i32 %b
+}
+
+define i32 @jt12(i32 %a, i32 %b) {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb5
+    i32 6, label %bb6
+    i32 7, label %bb7
+    i32 8, label %bb8
+    i32 9, label %bb9
+    i32 10, label %bb10
+    i32 11, label %bb11
+    i32 12, label %bb12
+  ]
+; CHECK-LABEL: function jt12:
+; CHECK0-NEXT: Jump Tables:
+; CHECK2-NEXT: Jump Tables:
+; CHECK4-NEXT: Jump Tables:
+; CHECK8-NEXT: Jump Tables:
+; CHECK12-NEXT: Jump Tables:
+; CHECK-DEFAULT-NOT: {{^}}Jump Tables:
+
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
+bb3: tail call void @ext(i32 3, i32 4) br label %return
+bb4: tail call void @ext(i32 4, i32 6) br label %return
+bb5: tail call void @ext(i32 5, i32 8) br label %return
+bb6: tail call void @ext(i32 6, i32 10) br label %return
+bb7: tail call void @ext(i32 7, i32 12) br label %return
+bb8: tail call void @ext(i32 8, i32 14) br label %return
+bb9: tail call void @ext(i32 9, i32 16) br label %return
+bb10: tail call void @ext(i32 10, i32 18) br label %return
+bb11: tail call void @ext(i32 11, i32 20) br label %return
+bb12: tail call void @ext(i32 12, i32 22) br label %return
+
+return: ret i32 %b
+}
+
+define i32 @jt13(i32 %a, i32 %b) {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb5
+    i32 6, label %bb6
+    i32 7, label %bb7
+    i32 8, label %bb8
+    i32 9, label %bb9
+    i32 10, label %bb10
+    i32 11, label %bb11
+    i32 12, label %bb12
+    i32 13, label %bb13
+  ]
+; CHECK-LABEL: function jt13:
 ; CHECK-NEXT: Jump Tables:
 
 bb1: tail call void @ext(i32 1, i32 0) br label %return
@@ -68,6 +151,11 @@ bb5: tail call void @ext(i32 5, i32 8) br label %return
 bb6: tail call void @ext(i32 6, i32 10) br label %return
 bb7: tail call void @ext(i32 7, i32 12) br label %return
 bb8: tail call void @ext(i32 8, i32 14) br label %return
+bb9: tail call void @ext(i32 9, i32 16) br label %return
+bb10: tail call void @ext(i32 10, i32 18) br label %return
+bb11: tail call void @ext(i32 11, i32 20) br label %return
+bb12: tail call void @ext(i32 12, i32 22) br label %return
+bb13: tail call void @ext(i32 13, i32 24) br label %return
 
 return: ret i32 %b
 }
diff --git a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll
index 15657730c2cdcb6..106f6bb856b6b8a 100644
--- a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll
+++ b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -aarch64-min-jump-table-entries=4 %s -o - | FileCheck %s
 
 define void @f0() "patchable-function-entry"="0" "branch-target-enforcement"="true" {
 ; CHECK-LABEL: f0:
diff --git a/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll b/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll
index c150cb889313ac9..caa28b31a6f6539 100644
--- a/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll
+++ b/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll
@@ -11,33 +11,37 @@ define i32 @test(i32 %input, i32 %n, i32 %a) {
 ; CHECK-NEXT:  .LBB0_2: // %bb.0
 ; CHECK-NEXT:    add w8, w0, w1
 ; CHECK-NEXT:    mov w0, #100 // =0x64
-; CHECK-NEXT:    cmp w8, #4
-; CHECK-NEXT:    b.hi .LBB0_5
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    b.le .LBB0_7
 ; CHECK-NEXT:  // %bb.3: // %bb.0
-; CHECK-NEXT:    adrp x9, .LJTI0_0
-; CHECK-NEXT:    add x9, x9, :lo12:.LJTI0_0
-; CHECK-NEXT:    adr x10, .LBB0_4
-; CHECK-NEXT:    ldrb w11, [x9, x8]
-; CHECK-NEXT:    add x10, x10, x11, lsl #2
-; CHECK-NEXT:    br x10
-; CHECK-NEXT:  .LBB0_4: // %sw.bb
-; CHECK-NEXT:    add w0, w2, #1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_5: // %bb.0
+; CHECK-NEXT:    cmp w8, #2
+; CHECK-NEXT:    b.eq .LBB0_10
+; CHECK-NEXT:  // %bb.4: // %bb.0
+; CHECK-NEXT:    cmp w8, #4
+; CHECK-NEXT:    b.eq .LBB0_11
+; CHECK-NEXT:  // %bb.5: // %bb.0
 ; CHECK-NEXT:    cmp w8, #200
-; CHECK-NEXT:    b.ne .LBB0_9
+; CHECK-NEXT:    b.ne .LBB0_12
 ; CHECK-NEXT:  // %bb.6: // %sw.bb7
 ; CHECK-NEXT:    add w0, w2, #7
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_7: // %sw.bb3
+; CHECK-NEXT:  .LBB0_7: // %bb.0
+; CHECK-NEXT:    cbz w8, .LBB0_13
+; CHECK-NEXT:  // %bb.8: // %bb.0
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    b.ne .LBB0_12
+; CHECK-NEXT:  // %bb.9: // %sw.bb1
+; CHECK-NEXT:    add w0, w2, #3
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_10: // %sw.bb3
 ; CHECK-NEXT:    add w0, w2, #4
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_8: // %sw.bb5
+; CHECK-NEXT:  .LBB0_11: // %sw.bb5
 ; CHECK-NEXT:    add w0, w2, #5
-; CHECK-NEXT:  .LBB0_9: // %return
+; CHECK-NEXT:  .LBB0_12: // %return
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_10: // %sw.bb1
-; CHECK-NEXT:    add w0, w2, #3
+; CHECK-NEXT:  .LBB0_13: // %sw.bb
+; CHECK-NEXT:    add w0, w2, #1
 ; CHECK-NEXT:    ret
 entry:
   %b = add nsw i32 %input, %n
@@ -77,3 +81,4 @@ return:
   %retval.0 = phi i32 [ %add8, %sw.bb7 ], [ %add6, %sw.bb5 ], [ %add4, %sw.bb3 ], [ %add2, %sw.bb1 ], [ %add, %sw.bb ], [ 100, %bb.0 ], [ 0, %entry ]
   ret i32 %retval.0
 }
+
diff --git a/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll b/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll
index 9acc5a150b630f2..949485bf52f2e8d 100644
--- a/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll
+++ b/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -o - %s | FileCheck %s
+; RUN: llc -O3 -aarch64-min-jump-table-entries=4 -o - %s | FileCheck %s
 
 ; Test that the output in the presence of an unreachable default does not have
 ; a compare and branch at the top of the switch to handle the default case.
diff --git a/llvm/test/CodeGen/AArch64/win64-jumptable.ll b/llvm/test/CodeGen/AArch64/win64-jumptable.ll
index 0b9b7deceae1138..f9f2b0bf0ca5cf5 100644
--- a/llvm/test/CodeGen/AArch64/win64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/win64-jumptable.ll
@@ -1,5 +1,5 @@
-; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 | FileCheck %s
-; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 -filetype=obj | llvm-readobj --unwind - | FileCheck %s -check-prefix=UNWIND
+; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 -aarch64-enable-compress-jump-tables=0 | FileCheck %s
+; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 -aarch64-enable-compress-jump-tables=0 -filetype=obj | llvm-readobj --unwind - | FileCheck %s -check-prefix=UNWIND
 
 define void @f(i32 %x) {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/wineh-bti.ll b/llvm/test/CodeGen/AArch64/wineh-bti.ll
index edf3699d52fd2e4..a73f4d219bc3144 100644
--- a/llvm/test/CodeGen/AArch64/wineh-bti.ll
+++ b/llvm/test/CodeGen/AArch64/wineh-bti.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 | FileCheck %s
 
 define dso_local i32 @func(i32 %in) {
 entry:
diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
index 364eadd64c7556d..a236f201eaafc55 100644
--- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll
+++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
@@ -8,10 +8,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86
 ; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86
 
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64
-; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64
 ; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -aarch64-redzone | FileCheck %s -check-prefixes=MFS-REDZONE-AARCH64
 
 ; COM: Machine function splitting with AFDO profiles
diff --git a/llvm/test/DebugInfo/COFF/jump-table.ll b/llvm/test/DebugInfo/COFF/jump-table.ll
index a1f16f48962f999..4d16c78c97882ab 100644
--- a/llvm/test/DebugInfo/COFF/jump-table.ll
+++ b/llvm/test/DebugInfo/COFF/jump-table.ll
@@ -3,7 +3,7 @@
 ; REQUIRES: x86-registered-target
 ; RUN: llc -mtriple=i686-windows < %s | FileCheck %s --check-prefixes=CHECK,I686,NOTA32
 ; RUN: llc -mtriple=x86_64-windows < %s | FileCheck %s --check-prefixes=CHECK,X64,NOTA32
-; RUN: llc -mtriple=aarch64-windows < %s | FileCheck %s --check-prefixes=CHECK,A64,NOTA32
+; RUN: llc -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 < %s | FileCheck %s --check-prefixes=CHECK,A64,NOTA32
 ; RUN: llc -mtriple=thumbv7a-windows < %s | FileCheck %s --check-prefixes=CHECK,A32
 ; RUN: llc -mtriple=x86_64-windows -filetype=obj < %s | llvm-readobj - --codeview | FileCheck %s --check-prefixes=CV
 

>From bb2a9b78e459dc41a26c1847dc0fed238782bfd3 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Fri, 10 Nov 2023 14:32:55 +0000
Subject: [PATCH 2/5] Take minsize attribute into account

I've changed getMinimumJumpTableEntries to now take a Function
parameter so that we can query the minsize attribute and only
increase the threshold if minsize is absent.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  3 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |  2 +-
 llvm/lib/CodeGen/SwitchLoweringUtils.cpp      |  5 ++-
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  3 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    | 11 ++++-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +
 llvm/lib/Target/VE/VEISelLowering.cpp         |  4 +-
 llvm/lib/Target/VE/VEISelLowering.h           |  2 +-
 llvm/test/CodeGen/AArch64/min-jump-table.ll   | 40 +++++++++++++++++++
 9 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 7a8f36da58ceccb..7e91f263c97ed18 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -470,7 +470,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
     // Check if suitable for a jump table.
     if (IsJTAllowed) {
-      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
+      if (N < 2 ||
+          N < TLI->getMinimumJumpTableEntries(SI.getParent()->getParent()))
         return N;
       uint64_t Range =
           (MaxCaseVal - MinCaseVal)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c87537291e3b161..48927f5cbf78348 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1904,7 +1904,7 @@ class TargetLoweringBase {
   virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
 
   /// Return lower limit for number of blocks in a jump table.
-  virtual unsigned getMinimumJumpTableEntries() const;
+  virtual unsigned getMinimumJumpTableEntries(const Function *F) const;
 
   /// Return lower limit of the density in a jump table.
   unsigned getMinimumJumpTableDensity(bool OptForSize) const;
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index b01a8bed0a394b6..165a9ec5af357c3 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -58,10 +58,11 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
 #endif
 
   assert(TLI && "TLI not set!");
-  if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
+  const Function *F = SI->getParent()->getParent();
+  if (!TLI->areJTsAllowed(F))
     return;
 
-  const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
+  const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(F);
   const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
 
   // Bail if not enough cases.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 722cefb1eddb3c5..31f48a2ef477131 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2035,7 +2035,8 @@ Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
   return nullptr;
 }
 
-unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
+unsigned
+TargetLoweringBase::getMinimumJumpTableEntries(const Function *F) const {
   return MinimumJumpTableEntries;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 002b95c68272f5c..f3f78dc064490e7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1657,8 +1657,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 
   IsStrictFPEnabled = true;
-
-  setMinimumJumpTableEntries(AArch64MinimumJumpTableEntries);
 }
 
 void AArch64TargetLowering::addTypeForNEON(MVT VT) {
@@ -26550,3 +26548,12 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
   }
   return true;
 }
+
+unsigned
+AArch64TargetLowering::getMinimumJumpTableEntries(const Function *F) const {
+  if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 ||
+      !F->hasMinSize())
+    return AArch64MinimumJumpTableEntries;
+
+  return TargetLoweringBase::getMinimumJumpTableEntries(F);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 7332a95615a4da5..efcb2a7b11a203d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -853,6 +853,8 @@ class AArch64TargetLowering : public TargetLowering {
   bool isComplexDeinterleavingOperationSupported(
       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
 
+  unsigned getMinimumJumpTableEntries(const Function *F) const override;
+
   Value *createComplexDeinterleavingIR(
       IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 0267aefd1e914e6..c2b1ae029441802 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -3085,12 +3085,12 @@ VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 // VE Target Optimization Support
 //===----------------------------------------------------------------------===//
 
-unsigned VETargetLowering::getMinimumJumpTableEntries() const {
+unsigned VETargetLowering::getMinimumJumpTableEntries(const Function *F) const {
   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
   if (isJumpTableRelative())
     return 8;
 
-  return TargetLowering::getMinimumJumpTableEntries();
+  return TargetLowering::getMinimumJumpTableEntries(F);
 }
 
 bool VETargetLowering::hasAndNot(SDValue Y) const {
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 8b9412d786625d1..02f6cff782c476c 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -326,7 +326,7 @@ class VETargetLowering : public TargetLowering {
   /// Target Optimization {
 
   // Return lower limit for number of blocks in a jump table.
-  unsigned getMinimumJumpTableEntries() const override;
+  unsigned getMinimumJumpTableEntries(const Function *F) const override;
 
   // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
   bool isIntDivCheap(EVT, AttributeList) const override { return false; }
diff --git a/llvm/test/CodeGen/AArch64/min-jump-table.ll b/llvm/test/CodeGen/AArch64/min-jump-table.ll
index 3b3f79746de0ab7..98b89210f5a05f6 100644
--- a/llvm/test/CodeGen/AArch64/min-jump-table.ll
+++ b/llvm/test/CodeGen/AArch64/min-jump-table.ll
@@ -123,6 +123,46 @@ bb12: tail call void @ext(i32 12, i32 22) br label %return
 return: ret i32 %b
 }
 
+define i32 @jt12_min_size(i32 %a, i32 %b) minsize {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb5
+    i32 6, label %bb6
+    i32 7, label %bb7
+    i32 8, label %bb8
+    i32 9, label %bb9
+    i32 10, label %bb10
+    i32 11, label %bb11
+    i32 12, label %bb12
+  ]
+; CHECK-LABEL: function jt12_min_size:
+; CHECK0-NEXT: Jump Tables:
+; CHECK2-NEXT: Jump Tables:
+; CHECK4-NEXT: Jump Tables:
+; CHECK8-NEXT: Jump Tables:
+; CHECK12-NEXT: Jump Tables:
+; CHECK-DEFAULT: Jump Tables:
+
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
+bb3: tail call void @ext(i32 3, i32 4) br label %return
+bb4: tail call void @ext(i32 4, i32 6) br label %return
+bb5: tail call void @ext(i32 5, i32 8) br label %return
+bb6: tail call void @ext(i32 6, i32 10) br label %return
+bb7: tail call void @ext(i32 7, i32 12) br label %return
+bb8: tail call void @ext(i32 8, i32 14) br label %return
+bb9: tail call void @ext(i32 9, i32 16) br label %return
+bb10: tail call void @ext(i32 10, i32 18) br label %return
+bb11: tail call void @ext(i32 11, i32 20) br label %return
+bb12: tail call void @ext(i32 12, i32 22) br label %return
+
+return: ret i32 %b
+}
+
 define i32 @jt13(i32 %a, i32 %b) {
 entry:
   switch i32 %a, label %return [

>From 5514ecd741c111517a2f746bb1ab414c4b9b6bec Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 13 Nov 2023 08:48:41 +0000
Subject: [PATCH 3/5] Let the subtarget decide the minimum at construction

Instead of querying the function attributes every time, we can
let the subtarget decide once at construction.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  3 +--
 llvm/include/llvm/CodeGen/TargetLowering.h    |  2 +-
 llvm/lib/CodeGen/SwitchLoweringUtils.cpp      |  5 ++---
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  3 +--
 .../Target/AArch64/AArch64ISelLowering.cpp    | 13 ++-----------
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  4 ++--
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  | 19 ++++++++++++++-----
 llvm/lib/Target/AArch64/AArch64Subtarget.h    | 10 +++++++---
 .../Target/AArch64/AArch64TargetMachine.cpp   |  6 ++++--
 llvm/lib/Target/VE/VEISelLowering.cpp         |  4 ++--
 llvm/lib/Target/VE/VEISelLowering.h           |  2 +-
 11 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 7e91f263c97ed18..7a8f36da58ceccb 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -470,8 +470,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
     // Check if suitable for a jump table.
     if (IsJTAllowed) {
-      if (N < 2 ||
-          N < TLI->getMinimumJumpTableEntries(SI.getParent()->getParent()))
+      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
         return N;
       uint64_t Range =
           (MaxCaseVal - MinCaseVal)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 48927f5cbf78348..c87537291e3b161 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1904,7 +1904,7 @@ class TargetLoweringBase {
   virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
 
   /// Return lower limit for number of blocks in a jump table.
-  virtual unsigned getMinimumJumpTableEntries(const Function *F) const;
+  virtual unsigned getMinimumJumpTableEntries() const;
 
   /// Return lower limit of the density in a jump table.
   unsigned getMinimumJumpTableDensity(bool OptForSize) const;
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 165a9ec5af357c3..b01a8bed0a394b6 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -58,11 +58,10 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
 #endif
 
   assert(TLI && "TLI not set!");
-  const Function *F = SI->getParent()->getParent();
-  if (!TLI->areJTsAllowed(F))
+  if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
     return;
 
-  const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(F);
+  const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
   const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
 
   // Bail if not enough cases.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 31f48a2ef477131..722cefb1eddb3c5 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2035,8 +2035,7 @@ Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
   return nullptr;
 }
 
-unsigned
-TargetLoweringBase::getMinimumJumpTableEntries(const Function *F) const {
+unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
   return MinimumJumpTableEntries;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f3f78dc064490e7..5d56a32be495867 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -144,10 +144,6 @@ static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
                                  cl::desc("Maximum of xors"));
 
-static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
-    "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
-    cl::desc("Set minimum number of entries to use a jump table on AArch64"));
-
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
 
@@ -26549,11 +26545,6 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
   return true;
 }
 
-unsigned
-AArch64TargetLowering::getMinimumJumpTableEntries(const Function *F) const {
-  if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 ||
-      !F->hasMinSize())
-    return AArch64MinimumJumpTableEntries;
-
-  return TargetLoweringBase::getMinimumJumpTableEntries(F);
+unsigned AArch64TargetLowering::getMinimumJumpTableEntries() const {
+  return Subtarget->getMinimumJumpTableEntries();
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index efcb2a7b11a203d..f7d004fa3cbcc3a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -853,8 +853,6 @@ class AArch64TargetLowering : public TargetLowering {
   bool isComplexDeinterleavingOperationSupported(
       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
 
-  unsigned getMinimumJumpTableEntries(const Function *F) const override;
-
   Value *createComplexDeinterleavingIR(
       IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
@@ -1252,6 +1250,8 @@ class AArch64TargetLowering : public TargetLowering {
                       SDLoc DL, EVT VT) const;
 
   bool preferScalarizeSplat(SDNode *N) const override;
+
+  unsigned getMinimumJumpTableEntries() const override;
 };
 
 namespace AArch64 {
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index e3c3bff8e32984e..e085124ad3bac4e 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -77,6 +77,10 @@ static cl::opt<AArch64PAuth::AuthCheckMethod>
                                         "to authenticated LR during tail call"),
                                cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
 
+static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
+    "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
+    cl::desc("Set minimum number of entries to use a jump table on AArch64"));
+
 unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
     return OverrideVectorInsertExtractBaseCost;
@@ -84,7 +88,7 @@ unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
 }
 
 AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
-    StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
+    StringRef FS, StringRef CPUString, StringRef TuneCPUString, bool HasMinSize) {
   // Determine default and user-specified characteristics
 
   if (CPUString.empty())
@@ -94,12 +98,12 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
     TuneCPUString = CPUString;
 
   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
-  initializeProperties();
+  initializeProperties(HasMinSize);
 
   return *this;
 }
 
-void AArch64Subtarget::initializeProperties() {
+void AArch64Subtarget::initializeProperties(bool HasMinSize) {
   // Initialize CPU specific properties. We should add a tablegen feature for
   // this in the future so we can specify it together with the subtarget
   // features.
@@ -292,6 +296,10 @@ void AArch64Subtarget::initializeProperties() {
     MaxInterleaveFactor = 4;
     break;
   }
+
+  if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 ||
+      !HasMinSize)
+    MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
 }
 
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
@@ -300,7 +308,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
                                    unsigned MinSVEVectorSizeInBitsOverride,
                                    unsigned MaxSVEVectorSizeInBitsOverride,
                                    bool StreamingSVEMode,
-                                   bool StreamingCompatibleSVEMode)
+                                   bool StreamingCompatibleSVEMode,
+                                   bool HasMinSize)
     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
@@ -310,7 +319,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
       StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
-      InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
+      InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
       TLInfo(TM, *this) {
   if (AArch64::isX18ReservedByDefault(TT))
     ReserveXRegister.set(18);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b91c5c81ed4d274..a6c2f4f7fa28e09 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -112,6 +112,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   Align PrefFunctionAlignment;
   Align PrefLoopAlignment;
   unsigned MaxBytesForLoopAlignment = 0;
+  unsigned MinimumJumpTableEntries = 4;
   unsigned MaxJumpTableSize = 0;
 
   // ReserveXRegister[i] - X#i is not available as a general purpose register.
@@ -153,10 +154,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// subtarget initialization.
   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
                                                     StringRef CPUString,
-                                                    StringRef TuneCPUString);
+                                                    StringRef TuneCPUString,
+                                                    bool HasMinSize);
 
   /// Initialize properties based on the selected processor family.
-  void initializeProperties();
+  void initializeProperties(bool HasMinSize);
 
 public:
   /// This constructor initializes the data members to match that
@@ -166,7 +168,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
                    unsigned MinSVEVectorSizeInBitsOverride = 0,
                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
                    bool StreamingSVEMode = false,
-                   bool StreamingCompatibleSVEMode = false);
+                   bool StreamingCompatibleSVEMode = false,
+                   bool HasMinSize = false);
 
 // Getters for SubtargetFeatures defined in tablegen
 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
@@ -274,6 +277,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   }
 
   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
+  unsigned getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; }
 
   /// CPU has TBI (top byte of addresses is ignored during HW address
   /// translation) and OS enables it.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 3d818c76bd4b7d7..c21bb247d24c90c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -397,6 +397,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU;
   StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
   StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
+  bool HasMinSize = F.hasMinSize();
 
   bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
                           F.hasFnAttribute("aarch64_pstate_sm_body");
@@ -433,7 +434,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
                            << "StreamingSVEMode=" << StreamingSVEMode
                            << "StreamingCompatibleSVEMode="
                            << StreamingCompatibleSVEMode << CPU << TuneCPU
-                           << FS;
+                           << FS << "HasMinSize=" << HasMinSize;
 
   auto &I = SubtargetMap[Key];
   if (!I) {
@@ -443,7 +444,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
     resetTargetOptions(F);
     I = std::make_unique<AArch64Subtarget>(
         TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
-        MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
+        MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode,
+        HasMinSize);
   }
 
   assert((!StreamingSVEMode || I->hasSME()) &&
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index c2b1ae029441802..0267aefd1e914e6 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -3085,12 +3085,12 @@ VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 // VE Target Optimization Support
 //===----------------------------------------------------------------------===//
 
-unsigned VETargetLowering::getMinimumJumpTableEntries(const Function *F) const {
+unsigned VETargetLowering::getMinimumJumpTableEntries() const {
   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
   if (isJumpTableRelative())
     return 8;
 
-  return TargetLowering::getMinimumJumpTableEntries(F);
+  return TargetLowering::getMinimumJumpTableEntries();
 }
 
 bool VETargetLowering::hasAndNot(SDValue Y) const {
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 02f6cff782c476c..8b9412d786625d1 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -326,7 +326,7 @@ class VETargetLowering : public TargetLowering {
   /// Target Optimization {
 
   // Return lower limit for number of blocks in a jump table.
-  unsigned getMinimumJumpTableEntries(const Function *F) const override;
+  unsigned getMinimumJumpTableEntries() const override;
 
   // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
   bool isIntDivCheap(EVT, AttributeList) const override { return false; }

>From 328d6800885fd9efc6975859094bddafb197cc9d Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 13 Nov 2023 09:35:12 +0000
Subject: [PATCH 4/5] Fix clang format

---
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index e085124ad3bac4e..216a96ea97f8083 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -314,8 +314,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
-      IsLittle(LittleEndian),
-      StreamingSVEMode(StreamingSVEMode),
+      IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode),
       StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),

>From 6020ea9e3b90d1f15d6c210788ef4338309b7ba8 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 14 Nov 2023 11:40:26 +0000
Subject: [PATCH 5/5] Really fix the clang formatting issues

---
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp     | 6 +++---
 llvm/lib/Target/AArch64/AArch64Subtarget.h       | 4 +++-
 llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 4 ++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 216a96ea97f8083..ff14fcf5dfcc0f6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -88,7 +88,8 @@ unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
 }
 
 AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
-    StringRef FS, StringRef CPUString, StringRef TuneCPUString, bool HasMinSize) {
+    StringRef FS, StringRef CPUString, StringRef TuneCPUString,
+    bool HasMinSize) {
   // Determine default and user-specified characteristics
 
   if (CPUString.empty())
@@ -297,8 +298,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
     break;
   }
 
-  if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 ||
-      !HasMinSize)
+  if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
     MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index a6c2f4f7fa28e09..b2ee2e76d0e8ea8 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -277,7 +277,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   }
 
   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
-  unsigned getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; }
+  unsigned getMinimumJumpTableEntries() const {
+    return MinimumJumpTableEntries;
+  }
 
   /// CPU has TBI (top byte of addresses is ignored during HW address
   /// translation) and OS enables it.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index c21bb247d24c90c..d418a297218eb06 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -433,8 +433,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
                            << MaxSVEVectorSize
                            << "StreamingSVEMode=" << StreamingSVEMode
                            << "StreamingCompatibleSVEMode="
-                           << StreamingCompatibleSVEMode << CPU << TuneCPU
-                           << FS << "HasMinSize=" << HasMinSize;
+                           << StreamingCompatibleSVEMode << CPU << TuneCPU << FS
+                           << "HasMinSize=" << HasMinSize;
 
   auto &I = SubtargetMap[Key];
   if (!I) {



More information about the llvm-commits mailing list