[llvm] [llvm] Improve the RelLookupTableConverter pass (PR #93355)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 24 16:28:53 PDT 2024


https://github.com/PiJoules created https://github.com/llvm/llvm-project/pull/93355

This adds some improvements to the RelLookupTableConverter pass:

- Also operate on constant structs
- Operate on globals with more than one use
- Also support GEPs that use integral SourceElementTypes

This covers a lot more globals and shows ~31kB of reclaimed relro in chromium on fuchsia.

```
bloaty after/exe.unstripped/web_engine_exe -- before/exe.unstripped/web_engine_exe
    FILE SIZE        VM SIZE
 --------------  --------------
  +100% +16.9Ki  +100% +16.9Ki    .rodata
  +100%    +672  +100%    +672    .text
  +100%     +23  [ = ]       0    .debug_line_str
  +100%     +13  [ = ]       0    .debug_str
  +101%      +4  [ = ]       0    .comment
  [ = ]       0   +89%    -176    .relro_padding
   +98%    -512   +98%    -512    .relr.dyn
   +81% -1.19Ki  [ = ]       0    [Unmapped]
  +100% -1.71Ki  [ = ]       0    .strtab
   +98% -31.8Ki   +98% -31.8Ki    .data.rel.ro
  +100%  +346Mi  +100%  +176Mi    TOTAL
```

This still has the same functionality as before where we only touch globals used via GEPs and those GEPs can only be used via loads. This is still very conservative, but it's safe and we can loosen this later. This patch also makes sure we re-use the original name of the global rather than making a new name.

>From 5db4041ec8db59b6bade5a9727b3949a6b244084 Mon Sep 17 00:00:00 2001
From: Leonard Chan <leonardchan at google.com>
Date: Fri, 24 May 2024 15:25:03 -0700
Subject: [PATCH] [llvm] Improve the RelLookupTableConverter pass

This adds some improvements to the RelLookupTableConverter pass:

- Also operate on constant structs
- Operate on globals with more than one use
- Also support GEPs that use integral SourceElementTypes

This covers a lot more globals and shows ~31kB of reclaimed relro in
chromium on fuchsia.

```
bloaty after/exe.unstripped/web_engine_exe -- before/exe.unstripped/web_engine_exe
    FILE SIZE        VM SIZE
 --------------  --------------
  +100% +16.9Ki  +100% +16.9Ki    .rodata
  +100%    +672  +100%    +672    .text
  +100%     +23  [ = ]       0    .debug_line_str
  +100%     +13  [ = ]       0    .debug_str
  +101%      +4  [ = ]       0    .comment
  [ = ]       0   +89%    -176    .relro_padding
   +98%    -512   +98%    -512    .relr.dyn
   +81% -1.19Ki  [ = ]       0    [Unmapped]
  +100% -1.71Ki  [ = ]       0    .strtab
   +98% -31.8Ki   +98% -31.8Ki    .data.rel.ro
  +100%  +346Mi  +100%  +176Mi    TOTAL
```

This still has the same functionality as before where we only touch
globals used via GEPs and those GEPs can only be used via loads. This is
still very conservative, but it's safe and we can loosen this later.
This patch also makes sure we re-use the original name of the global
rather than making a new name.
---
 .../Utils/RelLookupTableConverter.cpp         | 337 ++++++++++++------
 .../X86/no_relative_lookup_table.ll           |  57 +++
 .../RelLookupTableConverter/X86/opaque-ptr.ll |   2 +-
 .../X86/relative_lookup_table.ll              | 156 ++++++--
 4 files changed, 415 insertions(+), 137 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index ea628d7c3d7d6..b55e5aeb76adb 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -13,60 +13,121 @@
 
 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/SimplifyQuery.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 
+#define DEBUG_TYPE "rellookuptable"
+
 using namespace llvm;
 
+static bool isValidGEP(const GlobalVariable *GV, const GetElementPtrInst *GEP) {
+  if (GEP->getOperand(0) != GV)
+    return false;
+
+  if (GEP->getNumOperands() == 3) {
+    // Match against a GEP with 3 operands representing
+    //
+    //   1. The global itself
+    //   2. The first index (which should be zero)
+    //   3. The actual offset from the start of the global.
+    //
+    // The GEP should look something like this:
+    //
+    //   getelementptr [4 x ptr], ptr @glob, i32 0, i32 %idx
+    //
+    const auto *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    if (!Idx || !Idx->isZero())
+      return false;
+
+    if (GV->getValueType() != GEP->getSourceElementType())
+      return false;
+
+  } else if (GEP->getNumOperands() == 2) {
+    // Match against a GEP with an integral source element type and 2 operands
+    // representing:
+    //
+    //   1. The global itself
+    //   2. The first index
+    //
+    // The GEP should look something like this:
+    //
+    //   getelementptr i8, ptr @glob, i32 %idx
+    //
+    // Here we take the byte offset from the start of the global.
+    if (!GEP->getSourceElementType()->isIntegerTy())
+      return false;
+
+  } else {
+    // Don't accept any other GEPs.
+    return false;
+  }
+
+  // Conservatively only accept GEPs that are used by loads. This is strict,
+  // but ensures the global never escapes the module so we can see all uses
+  // of it.
+  for (const User *U : GEP->users()) {
+    auto *Load = dyn_cast<LoadInst>(U);
+    if (!Load)
+      return false;
+
+    if (!Load->getType()->isPointerTy())
+      return false;
+  }
+
+  return true;
+}
+
 static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
-  // If lookup table has more than one user,
-  // do not generate a relative lookup table.
-  // This is to simplify the analysis that needs to be done for this pass.
-  // TODO: Add support for lookup tables with multiple uses.
-  // For ex, this can happen when a function that uses a lookup table gets
-  // inlined into multiple call sites.
-  if (!GV.hasInitializer() ||
-      !GV.isConstant() ||
-      !GV.hasOneUse())
+  // The global should look something like this:
+  //
+  //   @symbols = dso_local constant [3 x ptr] [ptr @.str, ptr @.str.1, ptr @.str.2]
+  //
+  // This definition must be the one we know will persist at link/runtime.
+  if (!GV.hasExactDefinition())
     return false;
 
-  GetElementPtrInst *GEP =
-      dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser());
-  if (!GEP || !GEP->hasOneUse() ||
-      GV.getValueType() != GEP->getSourceElementType())
+  // We must never be able to mutate this global.
+  if (!GV.isConstant())
     return false;
 
-  LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser());
-  if (!Load || !Load->hasOneUse() ||
-      Load->getType() != GEP->getResultElementType())
+  // The global must be local to the TU. We need this because it guarantees this
+  // global can't be directly referenced outside the TU. It's important that we
+  // see all uses of this global to ensure we can adjust every instances of how
+  // it's accessed.
+  if (!GV.hasLocalLinkage())
     return false;
 
-  // If the original lookup table does not have local linkage and is
-  // not dso_local, do not generate a relative lookup table.
-  // This optimization creates a relative lookup table that consists of
-  // offsets between the start of the lookup table and its elements.
-  // To be able to generate these offsets, relative lookup table and
-  // its elements should have internal linkage and be dso_local, which means
-  // that they should resolve to symbols within the same linkage unit.
-  if (!GV.hasLocalLinkage() ||
-      !GV.isDSOLocal() ||
-      !GV.isImplicitDSOLocal())
+  // Definitely don't operate on stuff like llvm.compiler.used.
+  if (GV.getName().starts_with("llvm."))
     return false;
 
-  ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer());
-  if (!Array)
+  // Operate only on struct or array types.
+  //
+  // TODO: Tecnically this can also work for a GlobalVariable whose initializer
+  // is another GlobalVariable. This would save one reloc for that case.
+  const Constant *Initializer = GV.getInitializer();
+  if (!Initializer->getType()->isAggregateType())
     return false;
 
+  // Ensure the only user of this global is valid GEPs.
+  for (const User *U : GV.users()) {
+    const auto *GEP = dyn_cast<GetElementPtrInst>(U);
+    if (!GEP)
+      return false;
+
+    if (!isValidGEP(&GV, GEP))
+      return false;
+  }
+
   // If values are not 64-bit pointers, do not generate a relative lookup table.
   const DataLayout &DL = M.getDataLayout();
-  Type *ElemType = Array->getType()->getElementType();
-  if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64)
-    return false;
 
-  for (const Use &Op : Array->operands()) {
+  for (const Use &Op : Initializer->operands()) {
     Constant *ConstOp = cast<Constant>(&Op);
     GlobalValue *GVOp;
     APInt Offset;
@@ -81,89 +142,156 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
     if (!GlovalVarOp || !GlovalVarOp->isConstant())
       return false;
 
-    if (!GlovalVarOp->hasLocalLinkage() ||
-        !GlovalVarOp->isDSOLocal() ||
-        !GlovalVarOp->isImplicitDSOLocal())
+    bool DSOLocal = GVOp->isDSOLocal() || GVOp->isImplicitDSOLocal();
+    if (!DSOLocal)
+      return false;
+
+    Type *ElemType = Op->getType();
+    if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64)
       return false;
   }
 
   return true;
 }
 
-static GlobalVariable *createRelLookupTable(Function &Func,
-                                            GlobalVariable &LookupTable) {
-  Module &M = *Func.getParent();
-  ConstantArray *LookupTableArr =
-      cast<ConstantArray>(LookupTable.getInitializer());
-  unsigned NumElts = LookupTableArr->getType()->getNumElements();
-  ArrayType *IntArrayTy =
-      ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts);
-
-  GlobalVariable *RelLookupTable = new GlobalVariable(
-    M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(),
-    nullptr, "reltable." + Func.getName(), &LookupTable,
-    LookupTable.getThreadLocalMode(), LookupTable.getAddressSpace(),
-    LookupTable.isExternallyInitialized());
-
-  uint64_t Idx = 0;
-  SmallVector<Constant *, 64> RelLookupTableContents(NumElts);
-
-  for (Use &Operand : LookupTableArr->operands()) {
-    Constant *Element = cast<Constant>(Operand);
+static GlobalVariable *createRelLookupTable(Module &M, GlobalVariable &GV) {
+  const Constant *Initializer = GV.getInitializer();
+  size_t NumOperands = Initializer->getNumOperands();
+  Type *OffsetTy = Type::getInt32Ty(M.getContext());
+
+  Type *ReplacementTy = [&]() -> Type *{
+    if (Initializer->getType()->isStructTy()) {
+      SmallVector<Type *, 8> ElemTys(NumOperands, OffsetTy);
+      return llvm::StructType::create(ElemTys);
+    } else if (Initializer->getType()->isArrayTy()) {
+      return llvm::ArrayType::get(OffsetTy, NumOperands);
+    }
+    llvm_unreachable("An aggregate type should be one of a struct or array");
+  }();
+
+  GlobalVariable *Replacement = new GlobalVariable(M, ReplacementTy, /*isConstant=*/true,
+      GV.getLinkage(), /*Initializer=*/nullptr);
+  Replacement->takeName(&GV); // Take over the old global's name
+  Replacement->setUnnamedAddr(GV.getUnnamedAddr());
+  Replacement->setVisibility(GV.getVisibility());
+  Replacement->setAlignment(llvm::Align(4));  // Unconditional 4-byte alignment
+
+  SmallVector<Constant *, 8> members(NumOperands);
+  for (size_t i = 0; i < NumOperands; ++i) {
+    Constant *OriginalMember = cast<Constant>(Initializer->getOperand(i));
+
+    // Take the offset.
     Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
-    Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy);
-    Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy);
+    Constant *Base = llvm::ConstantExpr::getPtrToInt(Replacement, IntPtrTy);
+    Constant *Target =
+        llvm::ConstantExpr::getPtrToInt(OriginalMember, IntPtrTy);
     Constant *Sub = llvm::ConstantExpr::getSub(Target, Base);
-    Constant *RelOffset =
-        llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(M.getContext()));
-    RelLookupTableContents[Idx++] = RelOffset;
+    Constant *RelOffset = llvm::ConstantExpr::getTrunc(Sub, OffsetTy);
+
+    members[i] = RelOffset;
   }
 
-  Constant *Initializer =
-      ConstantArray::get(IntArrayTy, RelLookupTableContents);
-  RelLookupTable->setInitializer(Initializer);
-  RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  RelLookupTable->setAlignment(llvm::Align(4));
-  return RelLookupTable;
+  Constant *ReplacementInit = [&]() -> Constant * {
+    // TODO: Is there any value in keeping this as a struct still since all elements will be the same type?
+    if (Initializer->getType()->isStructTy())
+      return llvm::ConstantStruct::get(cast<StructType>(ReplacementTy), members);
+    else if (Initializer->getType()->isArrayTy())
+      return llvm::ConstantArray::get(cast<ArrayType>(ReplacementTy), members);
+    llvm_unreachable("An aggregate type should be one of a struct or array");
+  }();
+
+  Replacement->setInitializer(ReplacementInit);
+  return Replacement;
 }
 
-static void convertToRelLookupTable(GlobalVariable &LookupTable) {
-  GetElementPtrInst *GEP =
-      cast<GetElementPtrInst>(LookupTable.use_begin()->getUser());
-  LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser());
-
-  Module &M = *LookupTable.getParent();
-  BasicBlock *BB = GEP->getParent();
-  IRBuilder<> Builder(BB);
-  Function &Func = *BB->getParent();
-
-  // Generate an array that consists of relative offsets.
-  GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable);
-
-  // Place new instruction sequence before GEP.
-  Builder.SetInsertPoint(GEP);
-  Value *Index = GEP->getOperand(2);
-  IntegerType *IntTy = cast<IntegerType>(Index->getType());
-  Value *Offset =
-      Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift");
-
-  // Insert the call to load.relative intrinsic before LOAD.
-  // GEP might not be immediately followed by a LOAD, like it can be hoisted
-  // outside the loop or another instruction might be inserted them in between.
-  Builder.SetInsertPoint(Load);
-  Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
-      &M, Intrinsic::load_relative, {Index->getType()});
-
-  // Create a call to load.relative intrinsic that computes the target address
-  // by adding base address (lookup table address) and relative offset.
-  Value *Result = Builder.CreateCall(LoadRelIntrinsic, {RelLookupTable, Offset},
-                                     "reltable.intrinsic");
-
-  // Replace load instruction with the new generated instruction sequence.
-  Load->replaceAllUsesWith(Result);
-  // Remove Load and GEP instructions.
-  Load->eraseFromParent();
-  GEP->eraseFromParent();
+static void convertToRelLookupTable(GlobalVariable &GV) {
+  Module &M = *GV.getParent();
+
+  // Generate a global that consists of relative offsets.
+  GlobalVariable *Replacement = createRelLookupTable(M, GV);
+
+  // Save these loads and GEPs to erase from their parents after we iterate
+  // through the users.
+  SmallVector<Instruction *, 16> ToRemove;
+
+  // Rn, we only account for geps, loads, and stores.
+  for (User *user : GV.users()) {
+    // We assert in an earlier check that all uses of this global must be GEPs.
+    auto *GEP = cast<GetElementPtrInst>(user);
+    for (User *user : GEP->users()) {
+      // We assert in an earlier check that all uses of this GEP must be loads.
+      auto *Load = cast<LoadInst>(user);
+      assert(GEP->getOperand(0) == &GV &&
+             "The first GEP operand should always be the global");
+
+      // Place new instruction sequence before GEP.
+      BasicBlock *BB = GEP->getParent();
+      IRBuilder<> Builder(BB);
+      Builder.SetInsertPoint(GEP);
+
+      // 1. The global itself
+      // 2. The first index (which should be zero)
+      // 3. The actual offset from the start of the global.
+      Value *Offset = [&]() -> Value * {
+        if (GEP->getNumOperands() == 3) {
+          // Convert to offset in bytes.
+          Value *Offset = GEP->getOperand(2);
+          return Builder.CreateShl(Offset,
+                                   ConstantInt::get(Offset->getType(), 2));
+        }
+        if (GEP->getNumOperands() == 2) {
+          assert(GEP->getSourceElementType()->isIntegerTy() &&
+                 "Unhandled source element type");
+          ;
+
+          size_t BitWidth =
+              cast<IntegerType>(GEP->getSourceElementType())->getBitWidth();
+          assert(isPowerOf2_32(BitWidth) && BitWidth >= 8 &&
+                 "Expected bitwidth to be multiple of byte size");
+
+          Value *Offset = GEP->getOperand(1);
+          if (BitWidth != 8)
+            Offset = Builder.CreateShl(
+                Offset,
+                ConstantInt::get(Offset->getType(), Log2_32(BitWidth / 8)));
+
+          return Offset;
+        }
+        llvm_unreachable("Unhandled GEP pattern");
+      }();
+
+      // Insert the call to load.relative intrinsic before LOAD.
+      // GEP might not be immediately followed by a LOAD, like it can be hoisted
+      // outside the loop or another instruction might be inserted them in
+      // between.
+      Builder.SetInsertPoint(Load);
+      Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
+          &M, Intrinsic::load_relative, {Offset->getType()});
+
+      // Create a call to load.relative intrinsic that computes the target
+      // address by adding base address (lookup table address) and relative
+      // offset.
+      Value *RelLoad = Builder.CreateCall(LoadRelIntrinsic, {&GV, Offset},
+                                          "reltable.intrinsic");
+
+      // Replace load instruction with the new generated instruction sequence.
+      Load->replaceAllUsesWith(RelLoad);
+
+      // NOTE: We remove the loads later since we cannot remove them during Use
+      // iteration.
+      ToRemove.push_back(Load);
+    }
+
+    ToRemove.push_back(GEP);
+  }
+
+  for (auto *Instr : ToRemove)
+    Instr->eraseFromParent();
+
+  // Remove the original lookup table.
+  GV.replaceAllUsesWith(Replacement);
+  GV.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "Converted " << Replacement->getName());
 }
 
 // Convert lookup tables to relative lookup tables in the module.
@@ -189,9 +317,6 @@ static bool convertToRelativeLookupTables(
 
     convertToRelLookupTable(GV);
 
-    // Remove the original lookup table.
-    GV.eraseFromParent();
-
     Changed = true;
   }
 
diff --git a/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll b/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll
index 26acb04c1d3cc..aeb9d2d457008 100644
--- a/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll
+++ b/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll
@@ -10,6 +10,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @.str.2 = private unnamed_addr constant [4 x i8] c"two\00", align 1
 @.str.3 = private unnamed_addr constant [8 x i8] c"default\00", align 1
 
+;; These globals structurally can be made PIC-friendly, but will not be
+;; transformed because they have a use that could potentially escape the
+;; visibility of this TU. If we want to transform a global, we must be able
+;; to see all uses of it.
+; CHECK-DAG: @escapes_through_call = internal constant [2 x ptr]
+ at escapes_through_call = internal constant [2 x ptr] [ ptr @.str, ptr @.str.1 ]
+
+; CHECK-DAG: @escapes_through_ret = internal constant [2 x ptr]
+ at escapes_through_ret = internal constant [2 x ptr] [ ptr @.str, ptr @.str.1 ]
+
+;; We allow usage through GEPs, but each GEP must only be used by a load.
+; CHECK-DAG: @escapes_through_gep_call = internal constant [2 x ptr]
+ at escapes_through_gep_call = internal constant [2 x ptr] [ ptr @.str, ptr @.str.1 ]
+
 @switch.table.string_table = private unnamed_addr constant [3 x ptr]
                              [
                               ptr @.str,
@@ -50,3 +64,46 @@ switch.lookup:                                    ; preds = %entry
 return:                                           ; preds = %entry
   ret ptr @.str.3
 }
+
+
+declare void @extern_func(ptr %p)
+
+define ptr @global_escapes_through_call(i32 %x) {
+entry:
+; CHECK-LABEL: @global_escapes_through_call
+; CHECK:       entry:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x ptr], ptr @escapes_through_call, i32 0, i32 %x
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GEP]], align 8
+; CHECK-NEXT:    call void @extern_func(ptr @escapes_through_call)
+; CHECK-NEXT:    ret ptr [[LOAD]]
+  %0 = getelementptr [2 x ptr], ptr @escapes_through_call, i32 0, i32 %x
+  %1 = load ptr, ptr %0
+  call void @extern_func(ptr @escapes_through_call)
+  ret ptr %1
+}
+
+define ptr @global_escapes_through_ret(i32 %x) {
+entry:
+; CHECK-LABEL: @global_escapes_through_ret
+; CHECK:       entry:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x ptr], ptr @escapes_through_ret, i32 0, i32 %x
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GEP]], align 8
+; CHECK-NEXT:    ret ptr @escapes_through_ret
+  %0 = getelementptr [2 x ptr], ptr @escapes_through_ret, i32 0, i32 %x
+  %1 = load ptr, ptr %0
+  ret ptr @escapes_through_ret
+}
+
+define ptr @global_escapes_through_gep_call(i32 %x) {
+entry:
+; CHECK-LABEL: @global_escapes_through_gep_call
+; CHECK:       entry:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x ptr], ptr @escapes_through_gep_call, i32 0, i32 %x
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GEP]], align 8
+; CHECK-NEXT:    call void @extern_func(ptr [[GEP]])
+; CHECK-NEXT:    ret ptr [[LOAD]]
+  %0 = getelementptr [2 x ptr], ptr @escapes_through_gep_call, i32 0, i32 %x
+  %1 = load ptr, ptr %0
+  call void @extern_func(ptr %0)
+  ret ptr %1
+}
diff --git a/llvm/test/Transforms/RelLookupTableConverter/X86/opaque-ptr.ll b/llvm/test/Transforms/RelLookupTableConverter/X86/opaque-ptr.ll
index b60f447a56774..f067feaa0ea85 100644
--- a/llvm/test/Transforms/RelLookupTableConverter/X86/opaque-ptr.ll
+++ b/llvm/test/Transforms/RelLookupTableConverter/X86/opaque-ptr.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-unknown-linux-gnu"
 define ptr @test(i32 %cond) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND:%.*]], 2
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.test, i32 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @table1, i32 [[RELTABLE_SHIFT]])
 ; CHECK-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
 ;
   %switch.gep = getelementptr inbounds [3 x ptr], ptr @table1, i32 0, i32 %cond
diff --git a/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll b/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll
index 9e433e9a90355..5fd40defd54ee 100644
--- a/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll
+++ b/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll
@@ -73,51 +73,63 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: @switch.table.external_linkage = private unnamed_addr constant [3 x ptr] [ptr @a1, ptr @b1, ptr @c1], align
 
 ; Lookup table check for integer pointers that have internal linkage
-; CHECK: @reltable.internal_linkage = private unnamed_addr constant [3 x i32]
+; CHECK: @switch.table.internal_linkage = private unnamed_addr constant [3 x i32]
 ; CHECK-SAME: [
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @reltable.internal_linkage to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @b2 to i64), i64 ptrtoint (ptr @reltable.internal_linkage to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @c2 to i64), i64 ptrtoint (ptr @reltable.internal_linkage to i64)) to i32)
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @switch.table.internal_linkage to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @b2 to i64), i64 ptrtoint (ptr @switch.table.internal_linkage to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @c2 to i64), i64 ptrtoint (ptr @switch.table.internal_linkage to i64)) to i32)
 ; CHECK-SAME: ], align 4
 
 ; Relative switch lookup table for strings
-; CHECK: @reltable.string_table = private unnamed_addr constant [3 x i32]
+; CHECK: @switch.table.string_table = private unnamed_addr constant [3 x i32]
 ; CHECK-SAME: [
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str to i64), i64 ptrtoint (ptr @reltable.string_table to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.1 to i64), i64 ptrtoint (ptr @reltable.string_table to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @reltable.string_table to i64)) to i32)
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str to i64), i64 ptrtoint (ptr @switch.table.string_table to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.1 to i64), i64 ptrtoint (ptr @switch.table.string_table to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @switch.table.string_table to i64)) to i32)
 ; CHECK-SAME: ], align 4
 
 ; Relative switch lookup table for strings with holes, where holes are filled with relative offset to default values
-; CHECK: @reltable.string_table_holes = private unnamed_addr constant [4 x i32]
+; CHECK: @switch.table.string_table_holes = private unnamed_addr constant [4 x i32]
 ; CHECK-SAME: [
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str to i64), i64 ptrtoint (ptr @reltable.string_table_holes to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.3 to i64), i64 ptrtoint (ptr @reltable.string_table_holes to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @reltable.string_table_holes to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.4 to i64), i64 ptrtoint (ptr @reltable.string_table_holes to i64)) to i32)
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str to i64), i64 ptrtoint (ptr @switch.table.string_table_holes to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.3 to i64), i64 ptrtoint (ptr @switch.table.string_table_holes to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @switch.table.string_table_holes to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.4 to i64), i64 ptrtoint (ptr @switch.table.string_table_holes to i64)) to i32)
 ; CHECK-SAME: ], align 4
 
 ; Single value check
-; CHECK: @reltable.single_value = private unnamed_addr constant [3 x i32]
+; CHECK: @switch.table.single_value = private unnamed_addr constant [3 x i32]
 ; CHECK-SAME: [
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str to i64), i64 ptrtoint (ptr @reltable.single_value to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.1 to i64), i64 ptrtoint (ptr @reltable.single_value to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @reltable.single_value to i64)) to i32)
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str to i64), i64 ptrtoint (ptr @switch.table.single_value to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.1 to i64), i64 ptrtoint (ptr @switch.table.single_value to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @switch.table.single_value to i64)) to i32)
 ; CHECK-SAME: ], align 4
 ;
 
 ; Relative lookup table for the loop hoist check test
-; CHECK: @reltable.loop_hoist = internal unnamed_addr constant [2 x i32]
+; CHECK: @table = internal constant [2 x i32]
 ; CHECK-SAME: [
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.8 to i64), i64 ptrtoint (ptr @reltable.loop_hoist to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.9 to i64), i64 ptrtoint (ptr @reltable.loop_hoist to i64)) to i32)
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.8 to i64), i64 ptrtoint (ptr @table to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.9 to i64), i64 ptrtoint (ptr @table to i64)) to i32)
 ; CHECK-SAME: ], align 4
 
 ; Relative look up table for the test where gep is not immediately followed by a load check
-; CHECK: @reltable.gep_is_not_imm_followed_by_load = internal unnamed_addr constant [2 x i32]
+; CHECK: @table2 = internal constant [2 x i32]
 ; CHECK-SAME: [
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.8 to i64), i64 ptrtoint (ptr @reltable.gep_is_not_imm_followed_by_load to i64)) to i32),
-; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.9 to i64), i64 ptrtoint (ptr @reltable.gep_is_not_imm_followed_by_load to i64)) to i32)
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.8 to i64), i64 ptrtoint (ptr @table2 to i64)) to i32),
+; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.9 to i64), i64 ptrtoint (ptr @table2 to i64)) to i32)
+; CHECK-SAME: ], align 4
+
+; CHECK:      @struct_table = internal constant {{.*}} {
+; CHECK-SAME:   i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.1 to i64), i64 ptrtoint (ptr @struct_table to i64)) to i32),
+; CHECK-SAME:   i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @struct_table to i64)) to i32),
+; CHECK-SAME:   i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.3 to i64), i64 ptrtoint (ptr @struct_table to i64)) to i32)
+; CHECK-SAME: }, align 4
+
+; CHECK:      @array_table = internal constant {{.*}} [
+; CHECK-SAME:   i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.1 to i64), i64 ptrtoint (ptr @array_table to i64)) to i32),
+; CHECK-SAME:   i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.2 to i64), i64 ptrtoint (ptr @array_table to i64)) to i32),
+; CHECK-SAME:   i32 trunc (i64 sub (i64 ptrtoint (ptr @.str.3 to i64), i64 ptrtoint (ptr @array_table to i64)) to i32)
 ; CHECK-SAME: ], align 4
 
 ; Lookup table check for integer pointers that have external linkage
@@ -154,7 +166,7 @@ define ptr @internal_linkage(i32 %cond) {
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.internal_linkage, i32 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @switch.table.internal_linkage, i32 [[RELTABLE_SHIFT]])
 ; CHECK-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret ptr @d2
@@ -180,7 +192,7 @@ define ptr @string_table(i32 %cond) {
   ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
   ; CHECK:       switch.lookup:
   ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2
-  ; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.string_table, i32 [[RELTABLE_SHIFT]])
+  ; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @switch.table.string_table, i32 [[RELTABLE_SHIFT]])
   ; CHECK-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
   ; CHECK:       return:
   ; CHECK-NEXT:    ret ptr @.str.3
@@ -206,7 +218,7 @@ define ptr @string_table_holes(i32 %cond) {
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND]], 2
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.string_table_holes, i32 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @switch.table.string_table_holes, i32 [[RELTABLE_SHIFT]])
 ; CHECK-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret ptr @.str.3
@@ -235,7 +247,7 @@ define void @single_value(i32 %cond)  {
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND]], 2
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.single_value, i32 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @switch.table.single_value, i32 [[RELTABLE_SHIFT]])
 ; CHECK:       sw.epilog:
 ; CHECK-NEXT:   [[STR1:%.*]] = phi ptr [ @.str.5, %entry ], [ @.str.7, %switch.lookup ]
 ; CHECK-NEXT:   [[STR2:%.*]] = phi ptr [ @.str.6, %entry ], [ [[RELTABLE_INTRINSIC]], [[SWITCH_LOOKUP]] ]
@@ -265,7 +277,7 @@ define ptr @user_defined_lookup_table(i32 %cond)  {
 ; CHECK:       cond.false:
 ; CHECK-NEXT:    [[IDX_PROM:%.*]] = sext i32 [[COND]] to i64
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[IDX_PROM]], 2
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @reltable.user_defined_lookup_table, i64 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @user_defined_lookup_table.table, i64 [[RELTABLE_SHIFT]])
 ; CHECK-NEXT:    br label %cond.end
 ; CHECK:       cond.end:
 ; CHECK-NEXT:    [[COND1:%.*]] = phi ptr [ [[RELTABLE_INTRINSIC]], %cond.false ], [ @.str.3, %entry ]
@@ -296,7 +308,7 @@ define ptr @loop_hoist(i32 %x) {
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 [[X:%.*]], 2
 ; CHECK-NEXT:    br i1 [[TMP0]], label %if.done, label %if.false
 ; CHECK:       if.false:
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.loop_hoist, i32 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @table, i32 [[RELTABLE_SHIFT]])
 ; CHECK-NEXT:    br label %if.done
 ; CHECK:       if.done:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi ptr [ @.str.10, %entry ], [ [[RELTABLE_INTRINSIC]], %if.false ]
@@ -327,7 +339,7 @@ define ptr @gep_is_not_imm_followed_by_load(i32 %x) {
 ; CHECK:       entry:
 ; CHECK-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i32 [[X:%.*]], 2
 ; CHECK-NEXT:    call void @may_not_return()
-; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @reltable.gep_is_not_imm_followed_by_load, i32 [[RELTABLE_SHIFT]])
+; CHECK-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i32(ptr @table2, i32 [[RELTABLE_SHIFT]])
 ; CHECK-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
 ;
 entry:
@@ -337,6 +349,90 @@ entry:
   ret ptr %1
 }
 
+ at struct_table = internal constant {ptr, ptr, ptr} {
+  ptr @.str.1,
+  ptr @.str.2,
+  ptr @.str.3
+}
+
+define ptr @convert_struct() {
+;; We should correctly access the struct. Its 3rd member is 8 bytes after the start.
+; CHECK-LABEL: @convert_struct
+; CHECK:       entry:
+; CHECK-NEXT:    [[PTR:%.*]] = call ptr @llvm.load.relative.i32(ptr @struct_table, i32 8)
+; CHECK-NEXT:    ret ptr [[PTR]]
+entry:
+  %0 = getelementptr { ptr, ptr, ptr }, ptr @struct_table, i32 0, i32 2
+  %1 = load ptr, ptr %0
+  ret ptr %1
+}
+
+declare void @func(ptr %p)
+
+define void @more_than_one_use() {
+; CHECK-LABEL: @more_than_one_use
+; CHECK:       entry:
+; CHECK-NEXT:    [[PTR:%.*]] = call ptr @llvm.load.relative.i32(ptr @struct_table, i32 0)
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @llvm.load.relative.i32(ptr @struct_table, i32 4)
+; CHECK-NEXT:    call void @func(ptr [[PTR]])
+; CHECK-NEXT:    call void @func(ptr [[PTR2]])
+entry:
+  %0 = getelementptr { ptr, ptr, ptr }, ptr @struct_table, i32 0, i32 0
+  %1 = load ptr, ptr %0
+  %2 = getelementptr { ptr, ptr, ptr }, ptr @struct_table, i32 0, i32 1
+  %3 = load ptr, ptr %2
+  call void @func(ptr %1)
+  call void @func(ptr %3)
+  ret void
+}
+
+ at array_table = internal constant [3 x ptr] [
+  ptr @.str.1,
+  ptr @.str.2,
+  ptr @.str.3
+]
+
+define ptr @i8_gep_with_two_operands(i32 %x) {
+entry:
+;; Check that we can convert GEPs where the global is treated as i8 pointers.
+; CHECK-LABEL: @i8_gep_with_two_operands
+; CHECK:       entry:
+; CHECK-NEXT:    [[PTR:%.*]] = call ptr @llvm.load.relative.i32(ptr @array_table, i32 %x)
+; CHECK-NEXT:    ret ptr [[PTR]]
+  %0 = getelementptr i8, ptr @array_table, i32 %x
+  %1 = load ptr, ptr %0
+  ret ptr %1
+}
+
+define void @i32_gep_with_two_operands(i32 %x) {
+entry:
+;; Handle 2-operand GEPs with different integral sizes. If we treat the global
+;; as a pointer to a different integral size, we should adjust the byte-offset
+;; accordingly.
+; CHECK-LABEL: @i32_gep_with_two_operands
+; CHECK:       entry:
+; CHECK-NEXT:    [[OFFSET:%.*]] = shl i32 %x, 1
+; CHECK-NEXT:    [[PTR:%.*]] = call ptr @llvm.load.relative.i32(ptr @array_table, i32 [[OFFSET]])
+; CHECK-NEXT:    [[OFFSET2:%.*]] = shl i32 %x, 2
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @llvm.load.relative.i32(ptr @array_table, i32 [[OFFSET2]])
+; CHECK-NEXT:    [[OFFSET3:%.*]] = shl i32 %x, 3
+; CHECK-NEXT:    [[PTR3:%.*]] = call ptr @llvm.load.relative.i32(ptr @array_table, i32 [[OFFSET3]])
+; CHECK-NEXT:    call void @func(ptr [[PTR]])
+; CHECK-NEXT:    call void @func(ptr [[PTR2]])
+; CHECK-NEXT:    call void @func(ptr [[PTR3]])
+; CHECK-NEXT:    ret void
+  %0 = getelementptr i16, ptr @array_table, i32 %x
+  %1 = load ptr, ptr %0
+  %2 = getelementptr i32, ptr @array_table, i32 %x
+  %3 = load ptr, ptr %2
+  %4 = getelementptr i64, ptr @array_table, i32 %x
+  %5 = load ptr, ptr %4
+  call void @func(ptr %1)
+  call void @func(ptr %3)
+  call void @func(ptr %5)
+  ret void
+}
+
 !llvm.module.flags = !{!0, !1}
 !0 = !{i32 7, !"PIC Level", i32 2}
 !1 = !{i32 1, !"Code Model", i32 1}



More information about the llvm-commits mailing list