[clang] [llvm] [JITLink][AArch32] Implement Armv5 ldr-pc stubs and use them for all pre-v7 targets (PR #79082)

Stefan Gränitz via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 23 06:48:14 PST 2024


https://github.com/weliveindetail updated https://github.com/llvm/llvm-project/pull/79082

>From c206fb211666e77cbe6aeb806174774f5db1a2ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz at gmail.com>
Date: Tue, 23 Jan 2024 02:35:27 +0100
Subject: [PATCH 1/4] [JITLink][AArch32] Implement Armv5 ldr-pc stubs and use
 them for all pre-v7 targets

---
 .../llvm/ExecutionEngine/JITLink/aarch32.h    | 50 +++++++++++----
 .../ExecutionEngine/JITLink/ELF_aarch32.cpp   | 23 +++----
 llvm/lib/ExecutionEngine/JITLink/aarch32.cpp  | 62 ++++++++++++++++---
 3 files changed, 99 insertions(+), 36 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
index ed53fa409ade895..30fb0d2d92aaa44 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
@@ -131,14 +131,15 @@ const char *getEdgeKindName(Edge::Kind K);
 /// Stubs are often called "veneers" in the official docs and online.
 ///
 enum class StubsFlavor {
-  Unsupported = 0,
+  Undefined = 0,
+  pre_v7,
   v7,
 };
 
 /// JITLink sub-arch configuration for Arm CPU models
 struct ArmConfig {
   bool J1J2BranchEncoding = false;
-  StubsFlavor Stubs = StubsFlavor::Unsupported;
+  StubsFlavor Stubs = StubsFlavor::Undefined;
   // In the long term, we might want a linker switch like --target1-rel
   bool Target1Rel = false;
 };
@@ -146,18 +147,12 @@ struct ArmConfig {
 /// Obtain the sub-arch configuration for a given Arm CPU model.
 inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) {
   ArmConfig ArmCfg;
-  switch (CPUArch) {
-  case ARMBuildAttrs::v7:
-  case ARMBuildAttrs::v8_A:
+  if (CPUArch == ARMBuildAttrs::v7 || CPUArch >= ARMBuildAttrs::v7E_M) {
     ArmCfg.J1J2BranchEncoding = true;
     ArmCfg.Stubs = StubsFlavor::v7;
-    break;
-  default:
-    DEBUG_WITH_TYPE("jitlink", {
-      dbgs() << "  Warning: ARM config not defined for CPU architecture "
-             << getCPUArchName(CPUArch) << " (" << CPUArch << ")\n";
-    });
-    break;
+  } else {
+    ArmCfg.J1J2BranchEncoding = false;
+    ArmCfg.Stubs = StubsFlavor::pre_v7;
   }
   return ArmCfg;
 }
@@ -341,6 +336,37 @@ class GOTBuilder : public TableManager<GOTBuilder> {
   Section *GOTSection = nullptr;
 };
 
+/// Stubs builder emits non-position-independent Arm stubs for pre-v7 CPUs.
+/// These architectures have no MovT/MovW instructions and don't support Thumb2.
+/// BL is the only Thumb instruction that can generate stubs and they can always
+/// be transformed into BLX.
+class StubsManager_prev7 : public TableManager<StubsManager_prev7> {
+public:
+  StubsManager_prev7() = default;
+
+  /// Name of the object file section that will contain all our stubs.
+  static StringRef getSectionName() {
+    return "__llvm_jitlink_aarch32_STUBS_prev7";
+  }
+
+  /// Implements link-graph traversal via visitExistingEdges()
+  bool visitEdge(LinkGraph &G, Block *B, Edge &E);
+
+  /// Create a Arm stub for pre-v7 CPUs
+  Symbol &createEntry(LinkGraph &G, Symbol &Target);
+
+private:
+  /// Get or create the object file section that will contain all our stubs
+  Section &getStubsSection(LinkGraph &G) {
+    if (!StubsSection)
+      StubsSection = &G.createSection(getSectionName(),
+                                      orc::MemProt::Read | orc::MemProt::Exec);
+    return *StubsSection;
+  }
+
+  Section *StubsSection = nullptr;
+};
+
 /// Stubs builder for v7 emits non-position-independent Arm and Thumb stubs.
 class StubsManager_v7 {
 public:
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
index 15c209e1ebe5bf6..c1f923d69c52d76 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
@@ -265,21 +265,8 @@ createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) {
   // Resolve our internal configuration for the target. If at some point the
   // CPUArch alone becomes too unprecise, we can find more details in the
   // Tag_CPU_arch_profile.
-  aarch32::ArmConfig ArmCfg;
-  using namespace ARMBuildAttrs;
-  auto Arch = static_cast<CPUArch>(ARM::getArchAttr(AK));
-  switch (Arch) {
-  case v7:
-  case v8_A:
-    ArmCfg = aarch32::getArmConfigForCPUArch(Arch);
-    assert(ArmCfg.Stubs != aarch32::StubsFlavor::Unsupported &&
-           "Provide a config for each supported CPU");
-    break;
-  default:
-    return make_error<JITLinkError>(
-        "Failed to build ELF link graph: Unsupported CPU arch " +
-        StringRef(aarch32::getCPUArchName(Arch)));
-  }
+  auto Arch = static_cast<ARMBuildAttrs::CPUArch>(ARM::getArchAttr(AK));
+  aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(Arch);
 
   // Populate the link-graph.
   switch (TT.getArch()) {
@@ -324,11 +311,15 @@ void link_ELF_aarch32(std::unique_ptr<LinkGraph> G,
       PassCfg.PrePrunePasses.push_back(markAllSymbolsLive);
 
     switch (ArmCfg.Stubs) {
+    case aarch32::StubsFlavor::pre_v7:
+      PassCfg.PostPrunePasses.push_back(
+          buildTables_ELF_aarch32<aarch32::StubsManager_prev7>);
+      break;
     case aarch32::StubsFlavor::v7:
       PassCfg.PostPrunePasses.push_back(
           buildTables_ELF_aarch32<aarch32::StubsManager_v7>);
       break;
-    case aarch32::StubsFlavor::Unsupported:
+    case aarch32::StubsFlavor::Undefined:
       llvm_unreachable("Check before building graph");
     }
   }
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
index 9508cde07b42a65..27f73d3c461cfa9 100644
--- a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
@@ -725,6 +725,60 @@ bool GOTBuilder::visitEdge(LinkGraph &G, Block *B, Edge &E) {
   return true;
 }
 
+/// Create a new node in the link-graph for the given stub template.
+template <size_t Size>
+static Block &allocStub(LinkGraph &G, Section &S, const uint8_t (&Code)[Size]) {
+  constexpr uint64_t Alignment = 4;
+  ArrayRef<char> Template(reinterpret_cast<const char *>(Code), Size);
+  return G.createContentBlock(S, Template, orc::ExecutorAddr(), Alignment, 0);
+}
+
+const uint8_t Armv5LongLdrPc[] = {
+    0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc,#-4] ; L1
+    0x00, 0x00, 0x00, 0x00, // L1: .word S
+};
+
+    // TODO: There is only ARM far stub now. We should add the Thumb stub,
+    // and stubs for branches Thumb - ARM and ARM - Thumb.
+//    writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4]
+
+Symbol &StubsManager_prev7::createEntry(LinkGraph &G, Symbol &Target) {
+  Block &B = allocStub(G, getStubsSection(G), Armv5LongLdrPc);
+  //LLVM_DEBUG({
+  //  const char *StubPtr = B.getContent().data();
+  //  HalfWords Reg12 = encodeRegMovtT1MovwT3(12);
+  //  assert(checkRegister<Thumb_MovwAbsNC>(StubPtr, Reg12) &&
+  //         checkRegister<Thumb_MovtAbs>(StubPtr + 4, Reg12) &&
+  //         "Linker generated stubs may only corrupt register r12 (IP)");
+  //});
+  B.addEdge(Data_Pointer32, 4, Target, 0);
+  return G.addAnonymousSymbol(B, 0, B.getSize(), true, false);
+}
+
+bool StubsManager_prev7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
+  if (E.getTarget().isDefined())
+    return false;
+
+  switch (E.getKind()) {
+  case Arm_Call:
+  case Arm_Jump24: {
+    DEBUG_WITH_TYPE("jitlink", {
+      dbgs() << "  Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+              << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+              << formatv("{0:x}", E.getOffset()) << ")\n";
+    });
+    E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+    return true;
+  }
+  case Thumb_Call:
+  case Thumb_Jump24:
+    // BL is never out-of-range and can always be rewritten to BLX inline.
+    // B can not target an external.
+    break;
+  }
+  return false;
+}
+
 const uint8_t Armv7ABS[] = {
     0x00, 0xc0, 0x00, 0xe3, // movw r12, #0x0000     ; lower 16-bit
     0x00, 0xc0, 0x40, 0xe3, // movt r12, #0x0000     ; upper 16-bit
@@ -737,14 +791,6 @@ const uint8_t Thumbv7ABS[] = {
     0x60, 0x47              // bx   r12
 };
 
-/// Create a new node in the link-graph for the given stub template.
-template <size_t Size>
-static Block &allocStub(LinkGraph &G, Section &S, const uint8_t (&Code)[Size]) {
-  constexpr uint64_t Alignment = 4;
-  ArrayRef<char> Template(reinterpret_cast<const char *>(Code), Size);
-  return G.createContentBlock(S, Template, orc::ExecutorAddr(), Alignment, 0);
-}
-
 static Block &createStubThumbv7(LinkGraph &G, Section &S, Symbol &Target) {
   Block &B = allocStub(G, S, Thumbv7ABS);
   B.addEdge(Thumb_MovwAbsNC, 0, Target, 0);

>From 646898b647a8facd7abba0436f912585d5827bc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz at gmail.com>
Date: Tue, 23 Jan 2024 02:36:52 +0100
Subject: [PATCH 2/4] [JITLink][AArch32] Expand tests to non-v7 targets

---
 .../JITLink/AArch32/ELF_relocations_arm.s     |  54 +++----
 .../AArch32/ELF_relocations_armv7plus.s       |  49 ++++++
 .../JITLink/AArch32/ELF_relocations_data.s    |   7 +-
 .../JITLink/AArch32/ELF_relocations_thumb.s   | 145 ------------------
 .../AArch32/ELF_relocations_thumbv6m.s        |  60 ++++++++
 .../AArch32/ELF_relocations_thumbv7a.s        |  45 ++++++
 .../AArch32/ELF_relocations_thumbv7m.s        | 107 +++++++++++++
 .../JITLink/AArch32/ELF_stubs_arm.s           |  30 ++--
 .../JITLink/AArch32/ELF_stubs_thumb.s         |  23 ++-
 9 files changed, 321 insertions(+), 199 deletions(-)
 create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_armv7plus.s
 delete mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumb.s
 create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv6m.s
 create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7a.s
 create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7m.s

diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_arm.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_arm.s
index 6fd383e2cce5c9f..3dec8c96f5cd575 100644
--- a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_arm.s
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_arm.s
@@ -1,8 +1,22 @@
-# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s
-# RUN: llvm-objdump -r %t.o | FileCheck --check-prefix=CHECK-TYPE %s
-# RUN: llvm-objdump --disassemble %t.o | FileCheck --check-prefix=CHECK-INSTR %s
+# Test pre-v7 Arm features
+#
+# RUN: llvm-mc -triple=armv4t-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv4t.o %s
+# RUN: llvm-objdump -r %t_armv4t.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_armv4t.o | FileCheck --check-prefix=CHECK-INSTR %s
 # RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
-# RUN:              -slab-page-size 4096 -show-entry-es -check %s %t.o
+# RUN:              -slab-page-size 4096 -check %s %t_armv4t.o
+#
+# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv7.o %s
+# RUN: llvm-objdump -r %t_armv7.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_armv7.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -check %s %t_armv7.o
+#
+# RUN: llvm-mc -triple=armv9-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv9.o %s
+# RUN: llvm-objdump -r %t_armv9.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_armv9.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -check %s %t_armv9.o
 
 
 	.text
@@ -63,38 +77,6 @@ jump24_target:
 	bx	lr
 	.size	jump24_target,	.-jump24_target
 
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVW_ABS_NC data_symbol
-# CHECK-INSTR: 	0000001c <movw>:
-# CHECK-INSTR: 	      1c: e3000000     movw      r0, #0x0
-# jitlink-check: decode_operand(movw, 1) = (data_symbol&0x0000ffff)
-	.globl	movw
-	.type	movw,%function
-	.p2align	2
-movw:
-	movw r0, :lower16:data_symbol
-	.size	movw,	.-movw
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVT_ABS data_symbol
-# CHECK-INSTR: 	00000020 <movt>:
-# CHECK-INSTR: 	      20: e3400000     movt      r0, #0x0
-# We decode the operand with index 2, because movt generates one leading implicit
-# predicate operand that we have to skip in order to decode the data_symbol operand
-# jitlink-check: decode_operand(movt, 2) = (data_symbol&0xffff0000>>16)
-	.globl	movt
-	.type	movt,%function
-	.p2align	2
-movt:
-	movt r0, :upper16:data_symbol
-	.size	movt,	.-movt
-
-	.data
-	.global data_symbol
-data_symbol:
-	.long 1073741822
-
-	.text
-
 # Empty main function for jitlink to be happy
 	.globl	main
 	.type	main,%function
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_armv7plus.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_armv7plus.s
new file mode 100644
index 000000000000000..890b2136959ef12
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_armv7plus.s
@@ -0,0 +1,49 @@
+# Test v7 Arm features
+#
+# RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv7.o %s
+# RUN: llvm-objdump -r %t_armv7.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_armv7.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs data_symbol=0x00001234 -check %s %t_armv7.o
+#
+# RUN: llvm-mc -triple=armv9-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_armv9.o %s
+# RUN: llvm-objdump -r %t_armv9.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_armv9.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs data_symbol=0x00001234 -check %s %t_armv9.o
+
+
+	.text
+	.syntax unified
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVW_ABS_NC data_symbol
+# CHECK-INSTR: <movw>:
+# CHECK-INSTR: e3000000 movw r0, #0x0
+# jitlink-check: decode_operand(movw, 1) = data_symbol[15:0]
+	.globl	movw
+	.type	movw,%function
+	.p2align	2
+movw:
+	movw r0, :lower16:data_symbol
+	.size	movw,	.-movw
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_MOVT_ABS data_symbol
+# CHECK-INSTR: <movt>:
+# CHECK-INSTR: e3400000 movt r0, #0x0
+# We decode the operand with index 2, because movt generates one leading implicit
+# predicate operand that we have to skip in order to decode the data_symbol operand
+# jitlink-check: decode_operand(movt, 2) = data_symbol[31:16]
+	.globl	movt
+	.type	movt,%function
+	.p2align	2
+movt:
+	movt r0, :upper16:data_symbol
+	.size	movt,	.-movt
+
+# Empty main function for jitlink to be happy
+	.globl	main
+	.type	main,%function
+	.p2align	2
+main:
+	bx	lr
+	.size	main,	.-main
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_data.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_data.s
index 7bd59f8a52de6d8..590ca816ecb9eb2 100644
--- a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_data.s
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_data.s
@@ -1,4 +1,9 @@
-# RUN: rm -rf %t && mkdir -p %t/armv7 && mkdir -p %t/thumbv7
+# RUN: rm -rf %t && mkdir -p %t/armv6 && mkdir -p %t/armv7 && mkdir -p %t/thumbv7
+# RUN: llvm-mc -triple=armv6-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t/armv6/out.o %s
+# RUN: llvm-objdump -r %t/armv6/out.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 \
+# RUN:              -abs target=0x76bbe88f -check %s %t/armv6/out.o
+
 # RUN: llvm-mc -triple=armv7-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t/armv7/out.o %s
 # RUN: llvm-objdump -r %t/armv7/out.o | FileCheck --check-prefix=CHECK-TYPE %s
 # RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 \
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumb.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumb.s
deleted file mode 100644
index 86f011834baae9f..000000000000000
--- a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumb.s
+++ /dev/null
@@ -1,145 +0,0 @@
-# RUN: llvm-mc -triple=thumbv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s
-# RUN: llvm-objdump -r %t.o | FileCheck --check-prefix=CHECK-TYPE %s
-# RUN: llvm-objdump --disassemble %t.o | FileCheck --check-prefix=CHECK-INSTR %s
-# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
-# RUN:              -slab-page-size 4096 -abs external_func=0x76bbe880 \
-# RUN:              -check %s %t.o
-
-
-	.text
-	.syntax unified
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_CALL call_target_thumb
-# CHECK-INSTR: 	00000000 <call_site>:
-# CHECK-INSTR: 	       0: f7ff fffe     bl
-# CHECK-INSTR: 	       4: f7ff fffe     bl
-# CHECK-INSTR: 	00000008 <call_target_thumb>
-# CHECK-INSTR: 	0000000c <call_target_arm>
-# We decode the operand with index 2, because bl generates two leading implicit
-# predicate operands that we have to skip in order to decode the call_target operand
-# jitlink-check: decode_operand(call_site + 0, 2) = call_target_thumb - (call_site + 4)
-# jitlink-check: decode_operand(call_site + 4, 2) = call_target_arm   - (call_site + 8)
-	.globl	call_site
-	.type	call_site,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-call_site:
-	bl	call_target_thumb
-	bl	call_target_arm
-	.size	call_site, .-call_site
-
-	.globl	call_target_thumb
-	.type	call_target_thumb,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-call_target_thumb:
-	bx	lr
-	.size	call_target_thumb, .-call_target_thumb
-
-	.globl	call_target_arm
-	.type	call_target_arm,%function
-	.p2align	2
-	.code	32
-call_target_arm:
-	bx	lr
-	.size	call_target_arm, .-call_target_arm
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_JUMP24 jump24_target
-# CHECK-INSTR: 	00000010 <jump24_site>:
-# CHECK-INSTR: 	      10: f7ff bffe     b.w
-# CHECK-INSTR: 	00000014 <jump24_target>
-# b.w generates two implicit predicate operands as well, but they are trailing
-# operands, so there is no need to adjust the operand index.
-# jitlink-check: decode_operand(jump24_site, 0) = jump24_target - next_pc(jump24_site)
-	.globl	jump24_site
-	.type	jump24_site,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-jump24_site:
-	b.w	jump24_target
-	.size	jump24_site,	.-jump24_site
-
-	.globl	jump24_target
-	.type	jump24_target,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-jump24_target:
-	bx	lr
-	.size	jump24_target,	.-jump24_target
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVW_ABS_NC data_symbol
-# CHECK-INSTR: 	00000016 <movw>:
-# CHECK-INSTR: 	      16: f240 0000     movw    r0, #0x0
-# jitlink-check: decode_operand(movw, 1) = (data_symbol&0x0000ffff)
-	.globl	movw
-	.type	movw,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-movw:
-	movw r0, :lower16:data_symbol
-	.size	movw,	.-movw
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVT_ABS data_symbol
-# CHECK-INSTR: 	0000001a <movt>:
-# CHECK-INSTR: 	      1a: f2c0 0000     movt    r0, #0x0
-# We decode the operand with index 2, because movt generates one leading implicit
-# predicate operand that we have to skip in order to decode the data_symbol operand
-# jitlink-check: decode_operand(movt, 2) = (data_symbol&0xffff0000>>16)
-	.globl	movt
-	.type	movt,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-movt:
-	movt r0, :upper16:data_symbol
-	.size	movt,	.-movt
-
-	.data
-	.global data_symbol
-data_symbol:
-	.long 1073741822
-
-	.text
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVW_PREL_NC external_func
-# CHECK-INSTR: 	0000001e <movw_prel>:
-# CHECK-INSTR: 	      1e: f240 0000     movw    r0, #0x0
-# jitlink-check: decode_operand(movw_prel, 1) = \
-# jitlink-check:              ((external_func - movw_prel)&0x0000ffff)
-.globl	movw_prel
-.type	movw_prel,%function
-.p2align	1
-.code	16
-.thumb_func
-movw_prel:
-	movw r0, :lower16:external_func - .
-	.size	movw_prel,	.-movw_prel
-
-# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVT_PREL external_func 
-# CHECK-INSTR: 	00000022 <movt_prel>:
-# CHECK-INSTR: 	      22: f2c0 0000    movt    r0, #0x0
-# jitlink-check: decode_operand(movt_prel, 2) = \
-# jitlink-check:               ((external_func - movt_prel)&0xffff0000>>16)
-.globl	movt_prel
-.type	movt_prel,%function
-.p2align	1
-.code	16
-.thumb_func
-movt_prel:
-	movt r0, :upper16:external_func - .
-	.size	movt_prel,	.-movt_prel
-
-# Empty main function for jitlink to be happy
-	.globl	main
-	.type	main,%function
-	.p2align	1
-	.code	16
-	.thumb_func
-main:
-	bx	lr
-	.size	main,	.-main
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv6m.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv6m.s
new file mode 100644
index 000000000000000..e0a224d9c710664
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv6m.s
@@ -0,0 +1,60 @@
+# Test pre-v7 Thumb features for Thumb-only targets
+#
+# RUN: llvm-mc -triple=thumbv6m-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_thumbv6m.o %s
+# RUN: llvm-objdump -r %t_thumbv6m.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_thumbv6m.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs external_func=0x76bbe880 \
+# RUN:              -check %s %t_thumbv6m.o
+#
+# RUN: llvm-mc -triple=thumbv7m-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_thumbv7m.o %s
+# RUN: llvm-objdump -r %t_thumbv7m.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_thumbv7m.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs external_func=0x76bbe880 \
+# RUN:              -check %s %t_thumbv7m.o
+#
+# RUN: llvm-mc -triple=thumbv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_thumbv7.o %s
+# RUN: llvm-objdump -r %t_thumbv7.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_thumbv7.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs external_func=0x76bbe880 \
+# RUN:              -check %s %t_thumbv7.o
+
+
+	.text
+	.syntax unified
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_CALL call_target_thumb
+# CHECK-INSTR: <call_site>:
+# CHECK-INSTR: f7ff fffe     bl
+# We decode the operand with index 2, because bl generates two leading implicit
+# predicate operands that we have to skip in order to decode the call_target operand
+# jitlink-check: decode_operand(call_site, 2) = call_target_thumb - (call_site + 4)
+	.globl	call_site
+	.type	call_site,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+call_site:
+	bl	call_target_thumb
+	.size	call_site, .-call_site
+
+	.globl	call_target_thumb
+	.type	call_target_thumb,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+call_target_thumb:
+	bx	lr
+	.size	call_target_thumb, .-call_target_thumb
+
+# Empty main function for jitlink to be happy
+	.globl	main
+	.type	main,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+main:
+	bx	lr
+	.size	main,	.-main
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7a.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7a.s
new file mode 100644
index 000000000000000..0e4a2cfb2c34965
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7a.s
@@ -0,0 +1,45 @@
+# Test v7 Thumb features for mixed Arm/Thumb targets
+#
+# RUN: llvm-mc -triple=thumbv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_thumbv7.o %s
+# RUN: llvm-objdump -r %t_thumbv7.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_thumbv7.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs external_func=0x76bbe880 \
+# RUN:              -check %s %t_thumbv7.o
+
+
+	.text
+	.syntax unified
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_CALL call_target_arm
+# CHECK-INSTR: <call_site>:
+# CHECK-INSTR: f7ff fffe     bl
+# We decode the operand with index 2, because bl generates two leading implicit
+# predicate operands that we have to skip in order to decode the call_target operand
+# jitlink-check: decode_operand(call_site, 2) = call_target_arm - next_pc(call_site)
+	.globl	call_site
+	.type	call_site,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+call_site:
+	bl	call_target_arm
+	.size	call_site, .-call_site
+
+	.globl	call_target_arm
+	.type	call_target_arm,%function
+	.p2align	2
+	.code	32
+call_target_arm:
+	bx	lr
+	.size	call_target_arm, .-call_target_arm
+
+# Empty main function for jitlink to be happy
+	.globl	main
+	.type	main,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+main:
+	bx	lr
+	.size	main,	.-main
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7m.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7m.s
new file mode 100644
index 000000000000000..4997fb3cf8ab1d4
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_relocations_thumbv7m.s
@@ -0,0 +1,107 @@
+# Test v7 Thumb features for Thumb-only targets
+#
+# RUN: llvm-mc -triple=thumbv7m-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_thumbv7m.o %s
+# RUN: llvm-objdump -r %t_thumbv7m.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_thumbv7m.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs ext_func=0x76bbe880 -abs ext_data=0x00001234 \
+# RUN:              -check %s %t_thumbv7m.o
+#
+# RUN: llvm-mc -triple=thumbv7-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t_thumbv7.o %s
+# RUN: llvm-objdump -r %t_thumbv7.o | FileCheck --check-prefix=CHECK-TYPE %s
+# RUN: llvm-objdump --disassemble %t_thumbv7.o | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb \
+# RUN:              -slab-page-size 4096 -abs ext_func=0x76bbe880 -abs ext_data=0x00001234 \
+# RUN:              -check %s %t_thumbv7.o
+
+	.text
+	.syntax unified
+
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_JUMP24 jump24_target
+# CHECK-INSTR: <jump24_site>:
+# CHECK-INSTR: f7ff bffe     b.w
+# b.w generates two implicit predicate operands as well, but they are trailing
+# operands, so there is no need to adjust the operand index.
+# jitlink-check: decode_operand(jump24_site, 0) = jump24_target - next_pc(jump24_site)
+	.globl	jump24_site
+	.type	jump24_site,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+jump24_site:
+	b.w	jump24_target
+	.size	jump24_site, .-jump24_site
+
+	.globl	jump24_target
+	.type	jump24_target,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+jump24_target:
+	bx	lr
+	.size	jump24_target, .-jump24_target
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVW_ABS_NC ext_data
+# CHECK-INSTR: <movw>:
+# CHECK-INSTR: f240 0000     movw    r0, #0x0
+# jitlink-check: decode_operand(movw, 1) = ext_data[15:0]
+	.globl	movw
+	.type	movw,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+movw:
+	movw r0, :lower16:ext_data
+	.size	movw,	.-movw
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVT_ABS ext_data
+# CHECK-INSTR: <movt>:
+# CHECK-INSTR: f2c0 0000     movt    r0, #0x0
+# We decode the operand with index 2, because movt generates one leading implicit
+# predicate operand that we have to skip in order to decode the ext_data operand
+# jitlink-check: decode_operand(movt, 2) = ext_data[31:16]
+	.globl	movt
+	.type	movt,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+movt:
+	movt r0, :upper16:ext_data
+	.size	movt,	.-movt
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVW_PREL_NC ext_func
+# CHECK-INSTR: <movw_prel>:
+# CHECK-INSTR: f240 0000     movw    r0, #0x0
+# jitlink-check: decode_operand(movw_prel, 1) = (ext_func - movw_prel)[15:0]
+  .globl	movw_prel
+  .type	movw_prel,%function
+  .p2align	1
+  .code	16
+  .thumb_func
+movw_prel:
+	movw r0, :lower16:ext_func - .
+	.size	movw_prel, .-movw_prel
+
+# CHECK-TYPE: {{[0-9a-f]+}} R_ARM_THM_MOVT_PREL ext_func
+# CHECK-INSTR: <movt_prel>:
+# CHECK-INSTR: f2c0 0000    movt    r0, #0x0
+# jitlink-check: decode_operand(movt_prel, 2) = (ext_func - movt_prel)[31:16]
+  .globl	movt_prel
+  .type	movt_prel,%function
+  .p2align	1
+  .code	16
+  .thumb_func
+movt_prel:
+	movt r0, :upper16:ext_func - .
+	.size	movt_prel, .-movt_prel
+
+# Empty main function for jitlink to be happy
+	.globl	main
+	.type	main,%function
+	.p2align	1
+	.code	16
+	.thumb_func
+main:
+	bx	lr
+	.size	main,	.-main
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_arm.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_arm.s
index fb2e0eb2c0bf249..d3a596c811ec4e3 100644
--- a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_arm.s
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_arm.s
@@ -1,10 +1,22 @@
-# RUN: rm -rf %t && mkdir -p %t
+# RUN: rm -rf %t && mkdir -p %t/armv4t && mkdir -p %t/armv6 && mkdir -p %t/armv7
+#
+# RUN: llvm-mc -triple=armv4t-linux-gnueabi -arm-add-build-attributes \
+# RUN:         -filetype=obj -o %t/armv4t/out.o %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 \
+# RUN:              -slab-allocate 10Kb -slab-page-size 4096 \
+# RUN:              -abs ext=0x76bbe880 -check %s %t/armv4t/out.o
+#
+# RUN: llvm-mc -triple=armv6-linux-gnueabi -arm-add-build-attributes \
+# RUN:         -filetype=obj -o %t/armv6/out.o %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 \
+# RUN:              -slab-allocate 10Kb -slab-page-size 4096 \
+# RUN:              -abs ext=0x76bbe880 -check %s %t/armv6/out.o
+#
 # RUN: llvm-mc -triple=armv7-linux-gnueabi -arm-add-build-attributes \
-# RUN:         -filetype=obj -o %t/out.o %s
+# RUN:         -filetype=obj -o %t/armv7/out.o %s
 # RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 \
 # RUN:              -slab-allocate 10Kb -slab-page-size 4096 \
-# RUN:              -abs ext=0x76bbe880 \
-# RUN:              -check %s %t/out.o
+# RUN:              -abs ext=0x76bbe880 -check %s %t/armv7/out.o
 
 	.text
 	.syntax unified
@@ -36,10 +48,10 @@ test_arm_call:
 	pop	{pc}
 	.size	test_arm_call, .-test_arm_call
 
-# This test is executable with both, Arm and Thumb `ext` functions. It only has
-# to return with `bx lr`. For example:
-#   > echo "void ext() {}" | clang -target armv7-linux-gnueabihf -o ext-arm.o -c -xc -
-#   > llvm-jitlink ext-arm.o out.o
+# This test is executable with any Arm (and for v7+ also Thumb) `ext` functions.
+# It only has to return with `bx lr`. For example:
+#   > echo "void ext() {}" | clang -target armv7-linux-gnueabihf -o ext.o -c -xc -
+#   > llvm-jitlink ext.o out.o
 #
 	.globl	main
 	.type	main,%function
@@ -48,6 +60,6 @@ main:
 	push	{lr}
 	bl	test_arm_call
 	bl	test_arm_jump
-	movw	r0, #0
+	mov	r0, #0
 	pop	{pc}
 	.size	main, .-main
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_thumb.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_thumb.s
index f6156628ce2a9fe..aa8c917a08809f0 100644
--- a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_thumb.s
+++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_stubs_thumb.s
@@ -1,10 +1,17 @@
-# RUN: rm -rf %t && mkdir -p %t
+# RUN: rm -rf %t && mkdir -p %t/thumbv7m && mkdir -p %t/thumbv7
+#
+# RUN: llvm-mc -triple=thumbv7m-linux-gnueabi -arm-add-build-attributes \
+# RUN:         -filetype=obj -o %t/thumbv7m/out.o %s
+# RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 \
+# RUN:              -slab-allocate 10Kb -slab-page-size 4096 \
+# RUN:              -abs ext=0x76bbe880 -check %s %t/thumbv7m/out.o
+#
 # RUN: llvm-mc -triple=thumbv7-linux-gnueabi -arm-add-build-attributes \
-# RUN:         -filetype=obj -o %t/elf_stubs.o %s
+# RUN:         -filetype=obj -o %t/thumbv7/out.o %s
 # RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 \
 # RUN:              -slab-allocate 10Kb -slab-page-size 4096 \
-# RUN:              -abs external_func=0x76bbe880 \
-# RUN:              -check %s %t/elf_stubs.o
+# RUN:              -abs ext=0x76bbe880 -check %s %t/thumbv7/out.o
+
 
 	.text
 	.syntax unified
@@ -14,15 +21,15 @@
 # where the branch-target address is loaded from a GOT entry. Instead, they
 # hard-code it in the immediate field.
 #
-# jitlink-check: decode_operand(test_external_call, 2) = stub_addr(elf_stubs.o, external_func) - next_pc(test_external_call)
-# jitlink-check: decode_operand(test_external_jump, 0) = stub_addr(elf_stubs.o, external_func) - next_pc(test_external_jump)
+# jitlink-check: decode_operand(test_external_call, 2) = stub_addr(out.o, ext) - next_pc(test_external_call)
+# jitlink-check: decode_operand(test_external_jump, 0) = stub_addr(out.o, ext) - next_pc(test_external_jump)
 	.globl  test_external_call
 	.type	test_external_call,%function
 	.p2align	1
 	.code	16
 	.thumb_func
 test_external_call:
-	bl	external_func
+	bl	ext
 	.size test_external_call, .-test_external_call
 
 	.globl  test_external_jump
@@ -31,7 +38,7 @@ test_external_call:
 	.code	16
 	.thumb_func
 test_external_jump:
-	b	external_func
+	b	ext
 	.size test_external_jump, .-test_external_jump
 
 # Empty main function for jitlink to be happy

>From b29100bbb51ca4576f76d70a5d71992a42d2d503 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz at gmail.com>
Date: Tue, 23 Jan 2024 03:03:20 +0100
Subject: [PATCH 3/4] fixup! [JITLink][AArch32] Implement Armv5 ldr-pc stubs
 and use them for all pre-v7 targets

---
 llvm/lib/ExecutionEngine/JITLink/aarch32.cpp | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
index 27f73d3c461cfa9..d1ac86c7281056e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
@@ -738,19 +738,8 @@ const uint8_t Armv5LongLdrPc[] = {
     0x00, 0x00, 0x00, 0x00, // L1: .word S
 };
 
-    // TODO: There is only ARM far stub now. We should add the Thumb stub,
-    // and stubs for branches Thumb - ARM and ARM - Thumb.
-//    writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4]
-
 Symbol &StubsManager_prev7::createEntry(LinkGraph &G, Symbol &Target) {
   Block &B = allocStub(G, getStubsSection(G), Armv5LongLdrPc);
-  //LLVM_DEBUG({
-  //  const char *StubPtr = B.getContent().data();
-  //  HalfWords Reg12 = encodeRegMovtT1MovwT3(12);
-  //  assert(checkRegister<Thumb_MovwAbsNC>(StubPtr, Reg12) &&
-  //         checkRegister<Thumb_MovtAbs>(StubPtr + 4, Reg12) &&
-  //         "Linker generated stubs may only corrupt register r12 (IP)");
-  //});
   B.addEdge(Data_Pointer32, 4, Target, 0);
   return G.addAnonymousSymbol(B, 0, B.getSize(), true, false);
 }
@@ -764,8 +753,8 @@ bool StubsManager_prev7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
   case Arm_Jump24: {
     DEBUG_WITH_TYPE("jitlink", {
       dbgs() << "  Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
-              << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
-              << formatv("{0:x}", E.getOffset()) << ")\n";
+             << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+             << formatv("{0:x}", E.getOffset()) << ")\n";
     });
     E.setTarget(this->getEntryForTarget(G, E.getTarget()));
     return true;

>From 559606e8b3354482e2e1b759a8e212e8899ac3fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz at gmail.com>
Date: Tue, 23 Jan 2024 15:46:22 +0100
Subject: [PATCH 4/4] Add multi-stub support for Armv5 ldr-pc

---
 clang/tools/clang-repl/CMakeLists.txt         |   2 +-
 .../llvm/ExecutionEngine/JITLink/aarch32.h    |  26 +++--
 llvm/lib/ExecutionEngine/JITLink/aarch32.cpp  | 110 +++++++++++-------
 3 files changed, 88 insertions(+), 50 deletions(-)

diff --git a/clang/tools/clang-repl/CMakeLists.txt b/clang/tools/clang-repl/CMakeLists.txt
index 2a0f617a2c0ff6b..031dcaba5e4468f 100644
--- a/clang/tools/clang-repl/CMakeLists.txt
+++ b/clang/tools/clang-repl/CMakeLists.txt
@@ -23,7 +23,7 @@ if(CLANG_PLUGIN_SUPPORT)
   export_executable_symbols_for_plugins(clang-repl)
 endif()
 
-string(TOUPPER ${CMAKE_SYSTEM_PROCESSOR} system_processor)
+string(TOUPPER "${CMAKE_SYSTEM_PROCESSOR}" system_processor)
 if(system_processor MATCHES "ARM")
   set(FLAG_LONG_PLT "-Wl,--long-plt")
   llvm_check_linker_flag(CXX ${FLAG_LONG_PLT} LINKER_HAS_FLAG_LONG_PLT)
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
index 30fb0d2d92aaa44..8f3b5b2e48c011d 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
@@ -340,7 +340,7 @@ class GOTBuilder : public TableManager<GOTBuilder> {
 /// These architectures have no MovT/MovW instructions and don't support Thumb2.
 /// BL is the only Thumb instruction that can generate stubs and they can always
 /// be transformed into BLX.
-class StubsManager_prev7 : public TableManager<StubsManager_prev7> {
+class StubsManager_prev7 {
 public:
   StubsManager_prev7() = default;
 
@@ -352,18 +352,24 @@ class StubsManager_prev7 : public TableManager<StubsManager_prev7> {
   /// Implements link-graph traversal via visitExistingEdges()
   bool visitEdge(LinkGraph &G, Block *B, Edge &E);
 
-  /// Create a Arm stub for pre-v7 CPUs
-  Symbol &createEntry(LinkGraph &G, Symbol &Target);
-
 private:
-  /// Get or create the object file section that will contain all our stubs
-  Section &getStubsSection(LinkGraph &G) {
-    if (!StubsSection)
-      StubsSection = &G.createSection(getSectionName(),
-                                      orc::MemProt::Read | orc::MemProt::Exec);
-    return *StubsSection;
+  // Each stub uses a single block that can have 2 entryponts, one for Arm and
+  // one for Thumb
+  struct StubMapEntry {
+    Block *B = nullptr;
+    Symbol *ArmEntry = nullptr;
+    Symbol *ThumbEntry = nullptr;
+  };
+
+  std::pair<StubMapEntry *, bool> getStubMapSlot(StringRef Name) {
+    auto &&[Stubs, NewStub] = StubMap.try_emplace(Name);
+    return std::make_pair(&Stubs->second, NewStub);
   }
 
+  Symbol *getOrCreateSlotEntrypoint(LinkGraph &G, StubMapEntry &Slot,
+                                    bool Thumb);
+
+  DenseMap<StringRef, StubMapEntry> StubMap;
   Section *StubsSection = nullptr;
 };
 
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
index d1ac86c7281056e..f143a79f1eeb1a3 100644
--- a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
@@ -725,49 +725,13 @@ bool GOTBuilder::visitEdge(LinkGraph &G, Block *B, Edge &E) {
   return true;
 }
 
-/// Create a new node in the link-graph for the given stub template.
-template <size_t Size>
-static Block &allocStub(LinkGraph &G, Section &S, const uint8_t (&Code)[Size]) {
-  constexpr uint64_t Alignment = 4;
-  ArrayRef<char> Template(reinterpret_cast<const char *>(Code), Size);
-  return G.createContentBlock(S, Template, orc::ExecutorAddr(), Alignment, 0);
-}
-
-const uint8_t Armv5LongLdrPc[] = {
+const uint8_t ArmThumbv5LdrPc[] = {
+    0x78, 0x47,             // bx pc
+    0xfd, 0xe7,             // b #-6 ; Arm recommended sequence to follow bx pc
     0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc,#-4] ; L1
     0x00, 0x00, 0x00, 0x00, // L1: .word S
 };
 
-Symbol &StubsManager_prev7::createEntry(LinkGraph &G, Symbol &Target) {
-  Block &B = allocStub(G, getStubsSection(G), Armv5LongLdrPc);
-  B.addEdge(Data_Pointer32, 4, Target, 0);
-  return G.addAnonymousSymbol(B, 0, B.getSize(), true, false);
-}
-
-bool StubsManager_prev7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
-  if (E.getTarget().isDefined())
-    return false;
-
-  switch (E.getKind()) {
-  case Arm_Call:
-  case Arm_Jump24: {
-    DEBUG_WITH_TYPE("jitlink", {
-      dbgs() << "  Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
-             << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
-             << formatv("{0:x}", E.getOffset()) << ")\n";
-    });
-    E.setTarget(this->getEntryForTarget(G, E.getTarget()));
-    return true;
-  }
-  case Thumb_Call:
-  case Thumb_Jump24:
-    // BL is never out-of-range and can always be rewritten to BLX inline.
-    // B can not target an external.
-    break;
-  }
-  return false;
-}
-
 const uint8_t Armv7ABS[] = {
     0x00, 0xc0, 0x00, 0xe3, // movw r12, #0x0000     ; lower 16-bit
     0x00, 0xc0, 0x40, 0xe3, // movt r12, #0x0000     ; upper 16-bit
@@ -780,6 +744,20 @@ const uint8_t Thumbv7ABS[] = {
     0x60, 0x47              // bx   r12
 };
 
+/// Create a new node in the link-graph for the given stub template.
+template <size_t Size>
+static Block &allocStub(LinkGraph &G, Section &S, const uint8_t (&Code)[Size]) {
+  constexpr uint64_t Alignment = 4;
+  ArrayRef<char> Template(reinterpret_cast<const char *>(Code), Size);
+  return G.createContentBlock(S, Template, orc::ExecutorAddr(), Alignment, 0);
+}
+
+static Block &createStubPrev7(LinkGraph &G, Section &S, Symbol &Target) {
+  Block &B = allocStub(G, S, ArmThumbv5LdrPc);
+  B.addEdge(Data_Pointer32, 8, Target, 0);
+  return B;
+}
+
 static Block &createStubThumbv7(LinkGraph &G, Section &S, Symbol &Target) {
   Block &B = allocStub(G, S, Thumbv7ABS);
   B.addEdge(Thumb_MovwAbsNC, 0, Target, 0);
@@ -837,6 +815,60 @@ static bool needsStub(const Edge &E) {
   return false;
 }
 
+// The ArmThumbv5LdrPc stub has 2 entrypoints: Thumb at offset 0 is taken only
+// for Thumb B instructions. Thumb BL is rewritten to BLX and takes the Arm
+// entrypoint at offset 4. Arm branches always use that one.
+Symbol *StubsManager_prev7::getOrCreateSlotEntrypoint(LinkGraph &G,
+                                                      StubMapEntry &Slot,
+                                                      bool Thumb) {
+  constexpr orc::ExecutorAddrDiff ThumbEntrypointOffset = 0;
+  constexpr orc::ExecutorAddrDiff ArmEntrypointOffset = 4;
+  if (Thumb && !Slot.ThumbEntry) {
+    Slot.ThumbEntry =
+        &G.addAnonymousSymbol(*Slot.B, ThumbEntrypointOffset, 4, true, false);
+    Slot.ThumbEntry->setTargetFlags(ThumbSymbol);
+  }
+  if (!Thumb && !Slot.ArmEntry)
+    Slot.ArmEntry =
+        &G.addAnonymousSymbol(*Slot.B, ArmEntrypointOffset, 8, true, false);
+  return Thumb ? Slot.ThumbEntry : Slot.ArmEntry;
+}
+
+bool StubsManager_prev7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
+  if (!needsStub(E))
+    return false;
+
+  Symbol &Target = E.getTarget();
+  assert(Target.hasName() && "Edge cannot point to anonymous target");
+  auto [Slot, NewStub] = getStubMapSlot(Target.getName());
+
+  if (NewStub) {
+    if (!StubsSection)
+      StubsSection = &G.createSection(getSectionName(),
+                                      orc::MemProt::Read | orc::MemProt::Exec);
+    LLVM_DEBUG({
+      dbgs() << "    Created stub entry for " << Target.getName() << " in "
+             << StubsSection->getName() << "\n";
+    });
+    Slot->B = &createStubPrev7(G, *StubsSection, Target);
+  }
+
+  // The ArmThumbv5LdrPc stub has 2 entrypoints: Thumb at offset 0 is taken only
+  // for Thumb B instructions. Thumb BL is rewritten to BLX and takes the Arm
+  // entrypoint at offset 4. Arm branches always use that one.
+  bool UseThumb = E.getKind() == Thumb_Jump24;
+  Symbol *StubEntrypoint = getOrCreateSlotEntrypoint(G, *Slot, UseThumb);
+
+  LLVM_DEBUG({
+    dbgs() << "    Using " << (UseThumb ? "Thumb" : "Arm") << " entrypoint "
+           << *StubEntrypoint << " in "
+           << StubEntrypoint->getBlock().getSection().getName() << "\n";
+  });
+
+  E.setTarget(*StubEntrypoint);
+  return true;
+}
+
 bool StubsManager_v7::visitEdge(LinkGraph &G, Block *B, Edge &E) {
   if (!needsStub(E))
     return false;



More information about the cfe-commits mailing list