[llvm] a8cd35c - [LowerTypeTests] Support generating Armv6-M jump tables. (reland)

Mon Feb 20 02:47:40 PST 2023

Author: Simon Tatham
Date: 2023-02-20T10:46:47Z
New Revision: a8cd35c3b7d56608fe7799fa203ab86bdf0315ca

URL: https://github.com/llvm/llvm-project/commit/a8cd35c3b7d56608fe7799fa203ab86bdf0315ca
DIFF: https://github.com/llvm/llvm-project/commit/a8cd35c3b7d56608fe7799fa203ab86bdf0315ca.diff

LOG: [LowerTypeTests] Support generating Armv6-M jump tables. (reland)

[Originally committed as f6ddf7781471b71243fa3c3ae7c93073f95c7dff;
reverted in bbef38352fbade9e014ec97d5991da5dee306da7 due to test
breakage; now relanded with the Arm tests conditioned on
`arm-registered-target`]

The LowerTypeTests pass emits a jump table in the form of an
`inlineasm` IR node containing a string representation of some
assembly. It tests the target triple to see what architecture it
should be generating assembly for. But that's not good enough for
`Triple::thumb`, because the 32-bit PC-relative `b.w` branch
instruction isn't available in all supported architecture versions. In
particular, Armv6-M doesn't support that instruction (although the
similar Armv8-M Baseline does).

Most of this patch is concerned with working out whether the
compilation target is Armv6-M or not, which I'm doing by going through
all the functions in the module, retrieving a TargetTransformInfo for
each one, and querying it via a new method I've added to check its
SubtargetInfo. If any function's TTI indicates that it's targeting an
architecture supporting B.W, then we assume we're also allowed to use
B.W in the jump table.

The Armv6-M compatible jump table format requires a temporary
register, and therefore also has to use the stack in order to restore
that register.

Another consequence of this change is that jump tables on Arm/Thumb
are no longer always the same size. In particular, on an architecture
that supports Arm and Thumb-1 but not Thumb-2, the Arm and Thumb
tables are different sizes from //each other//. As a consequence,
``getJumpTableEntrySize`` can no longer base its answer on the target
triple's architecture: it has to take into account the decision that
``selectJumpTableArmEncoding`` made, which meant I had to move that
function to an earlier point in the code and store its answer in the
``LowerTypeTestsModule`` class.

Reviewed By: lenary

Differential Revision: https://reviews.llvm.org/D143576

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.h
    llvm/lib/Transforms/IPO/LowerTypeTests.cpp
    llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll
    llvm/test/Transforms/LowerTypeTests/function.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 68889bb782334..c81ac7e1f97c2 100644

--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1570,6 +1570,17 @@ class TargetTransformInfo {
   VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
   /// @}
 
+  /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
+  /// state.
+  ///
+  /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
+  /// node containing a jump table in a format suitable for the target, so it
+  /// needs to know what format of jump table it can legally use.
+  ///
+  /// For non-Arm targets, this function isn't used. It defaults to returning
+  /// false, but it shouldn't matter what it returns anyway.
+  bool hasArmWideBranch(bool Thumb) const;
+
   /// @}
 
 private:
@@ -1927,6 +1938,7 @@ class TargetTransformInfo::Concept {
                                      Align Alignment) const = 0;
   virtual VPLegalization
   getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
+  virtual bool hasArmWideBranch(bool Thumb) const = 0;
 };
 
 template <typename T>
@@ -2606,6 +2618,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
     return Impl.getVPLegalizationStrategy(PI);
   }
+
+  bool hasArmWideBranch(bool Thumb) const override {
+    return Impl.hasArmWideBranch(Thumb);
+  }
 };
 
 template <typename T>

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9b19f3a506a05..78d66c269befa 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -862,6 +862,8 @@ class TargetTransformInfoImplBase {
         /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
   }
 
+  bool hasArmWideBranch(bool) const { return false; }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index e9c01e68fde2c..4ad5d2dc2434a 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1170,6 +1170,10 @@ TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
   return TTIImpl->getVPLegalizationStrategy(VPI);
 }
 
+bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
+  return TTIImpl->hasArmWideBranch(Thumb);
+}
+
 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
   return TTIImpl->shouldExpandReduction(II);
 }

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 0467cc7373137..d1060f665ccde 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2441,3 +2441,16 @@ InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
   }
   return -1;
 }
+
+bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const {
+  if (Thumb) {
+    // B.W is available in any Thumb2-supporting target, and also in every
+    // version of Armv8-M, even Baseline which does not include the rest of
+    // Thumb2.
+    return ST->isThumb2() || ST->hasV8MBaselineOps();
+  } else {
+    // B is available in all versions of the Arm ISA, so the only question is
+    // whether that ISA is available at all.
+    return ST->hasARMOps();
+  }
+}

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index ede400f9ecbc9..d75879b867df6 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -325,6 +325,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
 
     return true;
   }
+
+  bool hasArmWideBranch(bool Thumb) const;
+
   /// @}
 };
 

diff  --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 989c6a00db9fb..9ea05f975f4f9 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
@@ -406,6 +407,15 @@ class LowerTypeTestsModule {
   Triple::OSType OS;
   Triple::ObjectFormatType ObjectFormat;
 
+  // Determines which kind of Thumb jump table we generate. If arch is
+  // either 'arm' or 'thumb' we need to find this out, because
+  // selectJumpTableArmEncoding may decide to use Thumb in either case.
+  bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
+
+  // The jump table type we ended up deciding on. (Usually the same as
+  // Arch, except that 'arm' and 'thumb' are often interchangeable.)
+  Triple::ArchType JumpTableArch = Triple::UnknownArch;
+
   IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
   IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
   PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
@@ -481,6 +491,8 @@ class LowerTypeTestsModule {
 
   void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
                                        ArrayRef<GlobalTypeMember *> Globals);
+  Triple::ArchType
+  selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
   unsigned getJumpTableEntrySize();
   Type *getJumpTableEntryType();
   void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
@@ -518,7 +530,8 @@ class LowerTypeTestsModule {
   void replaceDirectCalls(Value *Old, Value *New);
 
 public:
-  LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
+  LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
+                       ModuleSummaryIndex *ExportSummary,
                        const ModuleSummaryIndex *ImportSummary,
                        bool DropTypeTests);
 
@@ -526,7 +539,7 @@ class LowerTypeTestsModule {
 
   // Lower the module using the action and summary passed as command line
   // arguments. For testing purposes only.
-  static bool runForTesting(Module &M);
+  static bool runForTesting(Module &M, ModuleAnalysisManager &AM);
 };
 } // end anonymous namespace
 
@@ -1182,31 +1195,36 @@ static const unsigned kX86JumpTableEntrySize = 8;
 static const unsigned kX86IBTJumpTableEntrySize = 16;
 static const unsigned kARMJumpTableEntrySize = 4;
 static const unsigned kARMBTIJumpTableEntrySize = 8;
+static const unsigned kARMv6MJumpTableEntrySize = 16;
 static const unsigned kRISCVJumpTableEntrySize = 8;
 
 unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
-  switch (Arch) {
-    case Triple::x86:
-    case Triple::x86_64:
-      if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
+  switch (JumpTableArch) {
+  case Triple::x86:
+  case Triple::x86_64:
+    if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
             M.getModuleFlag("cf-protection-branch")))
-        if (MD->getZExtValue())
-          return kX86IBTJumpTableEntrySize;
-      return kX86JumpTableEntrySize;
-    case Triple::arm:
-    case Triple::thumb:
+      if (MD->getZExtValue())
+        return kX86IBTJumpTableEntrySize;
+    return kX86JumpTableEntrySize;
+  case Triple::arm:
+    return kARMJumpTableEntrySize;
+  case Triple::thumb:
+    if (CanUseThumbBWJumpTable)
       return kARMJumpTableEntrySize;
-    case Triple::aarch64:
-      if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+    else
+      return kARMv6MJumpTableEntrySize;
+  case Triple::aarch64:
+    if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
             M.getModuleFlag("branch-target-enforcement")))
-        if (BTE->getZExtValue())
-          return kARMBTIJumpTableEntrySize;
-      return kARMJumpTableEntrySize;
-    case Triple::riscv32:
-    case Triple::riscv64:
-      return kRISCVJumpTableEntrySize;
-    default:
-      report_fatal_error("Unsupported architecture for jump tables");
+      if (BTE->getZExtValue())
+        return kARMBTIJumpTableEntrySize;
+    return kARMJumpTableEntrySize;
+  case Triple::riscv32:
+  case Triple::riscv64:
+    return kRISCVJumpTableEntrySize;
+  default:
+    report_fatal_error("Unsupported architecture for jump tables");
   }
 }
 
@@ -1240,7 +1258,32 @@ void LowerTypeTestsModule::createJumpTableEntry(
         AsmOS << "bti c\n";
     AsmOS << "b $" << ArgIndex << "\n";
   } else if (JumpTableArch == Triple::thumb) {
-    AsmOS << "b.w $" << ArgIndex << "\n";
+    if (!CanUseThumbBWJumpTable) {
+      // In Armv6-M, this sequence will generate a branch without corrupting
+      // any registers. We use two stack words; in the second, we construct the
+      // address we'll pop into pc, and the first is used to save and restore
+      // r0 which we use as a temporary register.
+      //
+      // To support position-independent use cases, the offset of the target
+      // function is stored as a relative offset (which will expand into an
+      // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other
+      // object file types), and added to pc after we load it. (The alternative
+      // B.W is automatically pc-relative.)
+      //
+      // There are five 16-bit Thumb instructions here, so the .balign 4 adds a
+      // sixth halfword of padding, and then the offset consumes a further 4
+      // bytes, for a total of 16, which is very convenient since entries in
+      // this jump table need to have power-of-two size.
+      AsmOS << "push {r0,r1}\n"
+            << "ldr r0, 1f\n"
+            << "0: add r0, r0, pc\n"
+            << "str r0, [sp, #4]\n"
+            << "pop {r0,pc}\n"
+            << ".balign 4\n"
+            << "1: .word $" << ArgIndex << " - (0b + 4)\n";
+    } else {
+      AsmOS << "b.w $" << ArgIndex << "\n";
+    }
   } else if (JumpTableArch == Triple::riscv32 ||
              JumpTableArch == Triple::riscv64) {
     AsmOS << "tail $" << ArgIndex << "@plt\n";
@@ -1352,12 +1395,19 @@ static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
 // Each jump table must be either ARM or Thumb as a whole for the bit-test math
 // to work. Pick one that matches the majority of members to minimize interop
 // veneers inserted by the linker.
-static Triple::ArchType
-selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
-                           Triple::ArchType ModuleArch) {
-  if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb)
-    return ModuleArch;
+Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding(
+    ArrayRef<GlobalTypeMember *> Functions) {
+  if (Arch != Triple::arm && Arch != Triple::thumb)
+    return Arch;
+
+  if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) {
+    // In architectures that provide Arm and Thumb-1 but not Thumb-2,
+    // we should always prefer the Arm jump table format, because the
+    // Thumb-1 one is larger and slower.
+    return Triple::arm;
+  }
 
+  // Otherwise, go with majority vote.
   unsigned ArmCount = 0, ThumbCount = 0;
   for (const auto GTM : Functions) {
     if (!GTM->isJumpTableCanonical()) {
@@ -1368,7 +1418,7 @@ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
     }
 
     Function *F = cast<Function>(GTM->getGlobal());
-    ++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount);
+    ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount);
   }
 
   return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
@@ -1381,8 +1431,6 @@ void LowerTypeTestsModule::createJumpTable(
   SmallVector<Value *, 16> AsmArgs;
   AsmArgs.reserve(Functions.size() * 2);
 
-  Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch);
-
   for (GlobalTypeMember *GTM : Functions)
     createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
                          cast<Function>(GTM->getGlobal()));
@@ -1399,9 +1447,11 @@ void LowerTypeTestsModule::createJumpTable(
     F->addFnAttr("target-features", "-thumb-mode");
   if (JumpTableArch == Triple::thumb) {
     F->addFnAttr("target-features", "+thumb-mode");
-    // Thumb jump table assembly needs Thumb2. The following attribute is added
-    // by Clang for -march=armv7.
-    F->addFnAttr("target-cpu", "cortex-a8");
+    if (CanUseThumbBWJumpTable) {
+      // Thumb jump table assembly needs Thumb2. The following attribute is
+      // added by Clang for -march=armv7.
+      F->addFnAttr("target-cpu", "cortex-a8");
+    }
   }
   // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
   // for the function to avoid double BTI. This is a no-op without
@@ -1521,6 +1571,10 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
   // FIXME: find a better way to represent the jumptable in the IR.
   assert(!Functions.empty());
 
+  // Decide on the jump table encoding, so that we know how big the
+  // entries will be.
+  JumpTableArch = selectJumpTableArmEncoding(Functions);
+
   // Build a simple layout based on the regular layout of jump tables.
   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
   unsigned EntrySize = getJumpTableEntrySize();
@@ -1706,18 +1760,31 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
 
 /// Lower all type tests in this module.
 LowerTypeTestsModule::LowerTypeTestsModule(
-    Module &M, ModuleSummaryIndex *ExportSummary,
+    Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
     const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
     : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
       DropTypeTests(DropTypeTests || ClDropTypeTests) {
   assert(!(ExportSummary && ImportSummary));
   Triple TargetTriple(M.getTargetTriple());
   Arch = TargetTriple.getArch();
+  if (Arch == Triple::arm)
+    CanUseArmJumpTable = true;
+  if (Arch == Triple::arm || Arch == Triple::thumb) {
+    auto &FAM =
+        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+    for (Function &F : M) {
+      auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+      if (TTI.hasArmWideBranch(false))
+        CanUseArmJumpTable = true;
+      if (TTI.hasArmWideBranch(true))
+        CanUseThumbBWJumpTable = true;
+    }
+  }
   OS = TargetTriple.getOS();
   ObjectFormat = TargetTriple.getObjectFormat();
 }
 
-bool LowerTypeTestsModule::runForTesting(Module &M) {
+bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
   ModuleSummaryIndex Summary(/*HaveGVs=*/false);
 
   // Handle the command-line summary arguments. This code is for testing
@@ -1735,7 +1802,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
 
   bool Changed =
       LowerTypeTestsModule(
-          M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+          M, AM,
+          ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
           ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
           /*DropTypeTests*/ false)
           .lower();
@@ -2298,10 +2366,10 @@ PreservedAnalyses LowerTypeTestsPass::run(Module &M,
                                           ModuleAnalysisManager &AM) {
   bool Changed;
   if (UseCommandLine)
-    Changed = LowerTypeTestsModule::runForTesting(M);
+    Changed = LowerTypeTestsModule::runForTesting(M, AM);
   else
     Changed =
-        LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
+        LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests)
             .lower();
   if (!Changed)
     return PreservedAnalyses::all();

diff  --git a/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll b/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll
index 32f91b1481194..62a9e5e012ab7 100644
--- a/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll
+++ b/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll
@@ -1,8 +1,10 @@
+; REQUIRES: arm-registered-target
+
 ; RUN: opt -S -mtriple=arm-unknown-linux-gnu -passes=lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t %s | FileCheck %s
 
 target datalayout = "e-p:64:64"
 
-define void @f1() "target-features"="+thumb-mode" !type !0 {
+define void @f1() "target-features"="+thumb-mode,+v6t2" !type !0 {
   ret void
 }
 

diff  --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll
index b5611d94de57c..968c9d434eb2f 100644
--- a/llvm/test/Transforms/LowerTypeTests/function.ll
+++ b/llvm/test/Transforms/LowerTypeTests/function.ll
@@ -2,13 +2,20 @@
 ; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-unknown-linux-gnu %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE %s
 ; RUN: opt -S -passes=lowertypetests -mtriple=i686-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s
 ; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s
-; RUN: opt -S -passes=lowertypetests -mtriple=arm-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s
-; RUN: opt -S -passes=lowertypetests -mtriple=thumb-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s
-; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s
 ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
 ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s
 ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s
 
+; The right format for Arm jump tables depends on the selected
+; subtarget, so we can't get these tests right without the Arm target
+; compiled in.
+; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=arm-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s %}
+; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv7m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s %}
+; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv8m.base-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s %}
+; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv6m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMBV6M,NATIVE %s %}
+; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=thumbv5-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s %}
+; RUN: %if arm-registered-target %{ opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s %}
+
 ; Tests that we correctly handle bitsets containing 2 or more functions.
 
 target datalayout = "e-p:64:64"
@@ -25,6 +32,7 @@ target datalayout = "e-p:64:64"
 ; X86: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1)
 ; ARM: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1)
 ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1)
+; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1)
 ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1)
 
 ; NATIVE: define hidden void @f.cfi()
@@ -53,9 +61,10 @@ define i1 @foo(ptr %p) {
 
 ; X86-LINUX:   define private void @[[JT]]() #[[ATTR:.*]] align 8 {
 ; X86-WIN32:   define private void @[[JT]]() #[[ATTR:.*]] align 8 {
-; ARM:   define private void @[[JT]]() #[[ATTR:.*]] align 4 {
-; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
-; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 {
+; ARM:         define private void @[[JT]]() #[[ATTR:.*]] align 4 {
+; THUMB:       define private void @[[JT]]() #[[ATTR:.*]] align 4 {
+; THUMBV6M:    define private void @[[JT]]() #[[ATTR:.*]] align 16 {
+; RISCV:       define private void @[[JT]]() #[[ATTR:.*]] align 8 {
 
 ; X86:      jmp ${0:c}@plt
 ; X86-SAME: int3
@@ -72,6 +81,21 @@ define i1 @foo(ptr %p) {
 ; THUMB:      b.w $0
 ; THUMB-SAME: b.w $1
 
+; THUMBV6M:      push {r0,r1}
+; THUMBV6M-SAME: ldr r0, 1f
+; THUMBV6M-SAME: 0: add r0, r0, pc
+; THUMBV6M-SAME: str r0, [sp, #4]
+; THUMBV6M-SAME: pop {r0,pc}
+; THUMBV6M-SAME: .balign 4
+; THUMBV6M-SAME: 1: .word $0 - (0b + 4)
+; THUMBV6M-SAME: push {r0,r1}
+; THUMBV6M-SAME: ldr r0, 1f
+; THUMBV6M-SAME: 0: add r0, r0, pc
+; THUMBV6M-SAME: str r0, [sp, #4]
+; THUMBV6M-SAME: pop {r0,pc}
+; THUMBV6M-SAME: .balign 4
+; THUMBV6M-SAME: 1: .word $1 - (0b + 4)
+
 ; RISCV:      tail $0 at plt
 ; RISCV-SAME: tail $1 at plt
 
@@ -81,6 +105,7 @@ define i1 @foo(ptr %p) {
 ; X86-WIN32: attributes #[[ATTR]] = { nocf_check nounwind }
 ; ARM: attributes #[[ATTR]] = { naked nounwind
 ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" }
+; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" }
 ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" }
 
 ; WASM32: ![[I0]] = !{i64 1}