[llvm] [IR] Disallow recursive types (PR #114799)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 4 06:16:10 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lto

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

StructType::setBody is the only mechanism that can potentially create
recursion in the type system. Add a runtime check that it is not
actually used to create recursion.

If the check fails, report an error from LLParser, BitcodeReader and
IRLinker. In all other cases assert that the check succeeds.

In future StructType::setBody will be removed in favor of specifying the
body when the type is created, so any performance hit from this runtime
check will be temporary.


---
Full diff: https://github.com/llvm/llvm-project/pull/114799.diff


16 Files Affected:

- (modified) llvm/docs/LangRef.rst (+1-1) 
- (modified) llvm/docs/ReleaseNotes.md (+2) 
- (modified) llvm/include/llvm/IR/DerivedTypes.h (+11-1) 
- (modified) llvm/lib/AsmParser/LLParser.cpp (+3-1) 
- (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (+2-1) 
- (modified) llvm/lib/IR/Type.cpp (+23-5) 
- (modified) llvm/lib/Linker/IRMover.cpp (+12-10) 
- (added) llvm/test/Assembler/mutually-recursive-types.ll (+7) 
- (modified) llvm/test/Assembler/unsized-recursive-type.ll (+1-5) 
- (modified) llvm/test/Linker/pr22807.ll (+2-5) 
- (removed) llvm/test/Verifier/recursive-struct-param.ll (-15) 
- (removed) llvm/test/Verifier/recursive-type-1.ll (-12) 
- (removed) llvm/test/Verifier/recursive-type-2.ll (-14) 
- (removed) llvm/test/Verifier/recursive-type-load.ll (-12) 
- (removed) llvm/test/Verifier/recursive-type-store.ll (-12) 
- (modified) llvm/unittests/Analysis/TargetLibraryInfoTest.cpp (+1-1) 


``````````diff
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index bbd9667756a498..765cc3204dd20f 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -4386,7 +4386,7 @@ is defined inline with other types (e.g. ``[2 x {i32, i32}]``) whereas
 identified types are always defined at the top level with a name.
 Literal types are uniqued by their contents and can never be recursive
 or opaque since there is no way to write one. Identified types can be
-recursive, can be opaqued, and are never uniqued.
+opaqued and are never uniqued. Identified types must not be recursive.
 
 :Syntax:
 
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index d5c650e74eeb28..ee2f1f5b9c49a5 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -56,6 +56,8 @@ Makes programs 10x faster by doing Special New Thing.
 Changes to the LLVM IR
 ----------------------
 
+* Types are no longer allowed to be recursive.
+
 * The `x86_mmx` IR type has been removed. It will be translated to
   the standard vector type `<1 x i64>` in bitcode upgrade.
 * Renamed `llvm.experimental.stepvector` intrinsic to `llvm.stepvector`.
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 820b5c0707df6c..eb0e5abdb7339c 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -33,6 +33,7 @@ class Value;
 class APInt;
 class LLVMContext;
 template <typename T> class Expected;
+class Error;
 
 /// Class to represent integer types. Note that this class is also used to
 /// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
@@ -317,9 +318,18 @@ class StructType : public Type {
   /// suffix if there is a collision. Do not call this on an literal type.
   void setName(StringRef Name);
 
-  /// Specify a body for an opaque identified type.
+  /// Specify a body for an opaque identified type, which must not make the type
+  /// recursive.
   void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
 
+  /// Specify a body for an opaque identified type or return an error if it
+  /// would make the type recursive.
+  Error setBodyOrError(ArrayRef<Type *> Elements, bool isPacked = false);
+
+  /// Return an error if the body for an opaque identified type would make it
+  /// recursive.
+  Error checkBody(ArrayRef<Type *> Elements);
+
   template <typename... Tys>
   std::enable_if_t<are_base_of<Type, Tys...>::value, void>
   setBody(Type *elt1, Tys *... elts) {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 8ddb2efb0e26c2..6ad8d21c054d6f 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -3391,7 +3391,9 @@ bool LLParser::parseStructDefinition(SMLoc TypeLoc, StringRef Name,
       (isPacked && parseToken(lltok::greater, "expected '>' in packed struct")))
     return true;
 
-  STy->setBody(Body, isPacked);
+  if (auto E = STy->setBodyOrError(Body, isPacked))
+    return tokError(toString(std::move(E)));
+
   ResultTy = STy;
   return false;
 }
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 446c98c8cecd88..3e82aa7188bd67 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2659,7 +2659,8 @@ Error BitcodeReader::parseTypeTableBody() {
       }
       if (EltTys.size() != Record.size()-1)
         return error("Invalid named struct record");
-      Res->setBody(EltTys, Record[0]);
+      if (auto E = Res->setBodyOrError(EltTys, Record[0]))
+        return E;
       ContainedIDs.append(Record.begin() + 1, Record.end());
       ResultTy = Res;
       break;
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index e311cde415174a..88ede0d35fa3ee 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/Type.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -436,20 +437,37 @@ bool StructType::containsHomogeneousTypes() const {
 }
 
 void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+  cantFail(setBodyOrError(Elements, isPacked));
+}
+
+Error StructType::setBodyOrError(ArrayRef<Type *> Elements, bool isPacked) {
   assert(isOpaque() && "Struct body already set!");
 
+  if (auto E = checkBody(Elements))
+    return E;
+
   setSubclassData(getSubclassData() | SCDB_HasBody);
   if (isPacked)
     setSubclassData(getSubclassData() | SCDB_Packed);
 
   NumContainedTys = Elements.size();
+  ContainedTys = Elements.empty()
+                     ? nullptr
+                     : Elements.copy(getContext().pImpl->Alloc).data();
 
-  if (Elements.empty()) {
-    ContainedTys = nullptr;
-    return;
-  }
+  return Error::success();
+}
 
-  ContainedTys = Elements.copy(getContext().pImpl->Alloc).data();
+Error StructType::checkBody(ArrayRef<Type *> Elements) {
+  SmallSetVector<Type *, 4> Worklist(Elements.begin(), Elements.end());
+  for (unsigned I = 0; I < Worklist.size(); ++I) {
+    Type *Ty = Worklist[I];
+    if (Ty == this)
+      return createStringError(Twine("identified structure type '") +
+                               getName() + "' is recursive");
+    Worklist.insert(Ty->subtype_begin(), Ty->subtype_end());
+  }
+  return Error::success();
 }
 
 void StructType::setName(StringRef Name) {
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 5067fbff2e277b..0d54c534590ca9 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -34,6 +34,12 @@
 #include <utility>
 using namespace llvm;
 
+/// Most of the errors produced by this module are inconvertible StringErrors.
+/// This convenience function lets us return one of those more easily.
+static Error stringErr(const Twine &T) {
+  return make_error<StringError>(T, inconvertibleErrorCode());
+}
+
 //===----------------------------------------------------------------------===//
 // TypeMap implementation.
 //===----------------------------------------------------------------------===//
@@ -69,7 +75,7 @@ class TypeMapTy : public ValueMapTypeRemapper {
 
   /// Produce a body for an opaque type in the dest module from a type
   /// definition in the source module.
-  void linkDefinedTypeBodies();
+  Error linkDefinedTypeBodies();
 
   /// Return the mapped type to use for the specified input type from the
   /// source module.
@@ -207,7 +213,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
   return true;
 }
 
-void TypeMapTy::linkDefinedTypeBodies() {
+Error TypeMapTy::linkDefinedTypeBodies() {
   SmallVector<Type *, 16> Elements;
   for (StructType *SrcSTy : SrcDefinitionsToResolve) {
     StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
@@ -218,11 +224,13 @@ void TypeMapTy::linkDefinedTypeBodies() {
     for (unsigned I = 0, E = Elements.size(); I != E; ++I)
       Elements[I] = get(SrcSTy->getElementType(I));
 
-    DstSTy->setBody(Elements, SrcSTy->isPacked());
+    if (auto E = DstSTy->setBodyOrError(Elements, SrcSTy->isPacked()))
+      return E;
     DstStructTypesSet.switchToNonOpaque(DstSTy);
   }
   SrcDefinitionsToResolve.clear();
   DstResolvedOpaqueTypes.clear();
+  return Error::success();
 }
 
 void TypeMapTy::finishType(StructType *DTy, StructType *STy,
@@ -439,12 +447,6 @@ class IRLinker {
       FoundError = std::move(E);
   }
 
-  /// Most of the errors produced by this module are inconvertible StringErrors.
-  /// This convenience function lets us return one of those more easily.
-  Error stringErr(const Twine &T) {
-    return make_error<StringError>(T, inconvertibleErrorCode());
-  }
-
   /// Entry point for mapping values and alternate context for mapping aliases.
   ValueMapper Mapper;
   unsigned IndirectSymbolMCID;
@@ -875,7 +877,7 @@ void IRLinker::computeTypeMapping() {
 
   // Now that we have discovered all of the type equivalences, get a body for
   // any 'opaque' types in the dest module that are now resolved.
-  TypeMap.linkDefinedTypeBodies();
+  setError(TypeMap.linkDefinedTypeBodies());
 }
 
 static void getArrayElements(const Constant *C,
diff --git a/llvm/test/Assembler/mutually-recursive-types.ll b/llvm/test/Assembler/mutually-recursive-types.ll
new file mode 100644
index 00000000000000..e030d8ec50654a
--- /dev/null
+++ b/llvm/test/Assembler/mutually-recursive-types.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; CHECK: error: identified structure type 'rt3' is recursive
+
+%rt1 = type { i32, { i8, %rt2, i8 }, i32 }
+%rt2 = type { i64, { i6, %rt3 } }
+%rt3 = type { %rt1 }
diff --git a/llvm/test/Assembler/unsized-recursive-type.ll b/llvm/test/Assembler/unsized-recursive-type.ll
index e849e912ec819e..76c8d010f13cae 100644
--- a/llvm/test/Assembler/unsized-recursive-type.ll
+++ b/llvm/test/Assembler/unsized-recursive-type.ll
@@ -1,9 +1,5 @@
 ; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
-; CHECK: base element of getelementptr must be sized
+; CHECK: error: identified structure type 'myTy' is recursive
 
 %myTy = type { %myTy }
-define void @foo(ptr %p){
-  getelementptr %myTy, ptr %p, i64 1
-  ret void
-}
diff --git a/llvm/test/Linker/pr22807.ll b/llvm/test/Linker/pr22807.ll
index 1db1eabd0555d6..a1fe38480c6ee3 100644
--- a/llvm/test/Linker/pr22807.ll
+++ b/llvm/test/Linker/pr22807.ll
@@ -1,9 +1,6 @@
-; RUN: llvm-link -S -o - %p/pr22807.ll %p/Inputs/pr22807-1.ll %p/Inputs/pr22807-2.ll | FileCheck %s
+; RUN: not llvm-link -S -o - %p/pr22807.ll %p/Inputs/pr22807-1.ll %p/Inputs/pr22807-2.ll 2>&1 | FileCheck %s
 
-; CHECK-NOT: type
-; CHECK: %struct.B = type { %struct.A }
-; CHECK-NEXT: %struct.A = type { %struct.B }
-; CHECK-NOT: type
+; CHECK: error: identified structure type 'struct.A' is recursive
 
 %struct.B = type { %struct.A }
 %struct.A = type opaque
diff --git a/llvm/test/Verifier/recursive-struct-param.ll b/llvm/test/Verifier/recursive-struct-param.ll
deleted file mode 100644
index 19497f6a802e77..00000000000000
--- a/llvm/test/Verifier/recursive-struct-param.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt -passes=verify < %s
-
-%struct.__sFILE = type { %struct.__sFILE }
-
- at .str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
-
-; Function Attrs: nounwind ssp
-define void @test(ptr %stream, ptr %str) {
-  %fputs = call i32 @fputs(ptr %str, ptr %stream)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare i32 @fputs(ptr nocapture, ptr nocapture)
-
diff --git a/llvm/test/Verifier/recursive-type-1.ll b/llvm/test/Verifier/recursive-type-1.ll
deleted file mode 100644
index 4a399575956231..00000000000000
--- a/llvm/test/Verifier/recursive-type-1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-%rt2 = type { i32, { i8, %rt2, i8 }, i32 }
-
-define i32 @main() nounwind {
-entry:
-  ; Check that recursive types trigger an error instead of segfaulting, when
-  ; the recursion isn't through a pointer to the type.
-  ; CHECK: Cannot allocate unsized type
-  %0 = alloca %rt2
-  ret i32 0
-}
diff --git a/llvm/test/Verifier/recursive-type-2.ll b/llvm/test/Verifier/recursive-type-2.ll
deleted file mode 100644
index 5f2f66fa1b11fd..00000000000000
--- a/llvm/test/Verifier/recursive-type-2.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-%rt1 = type { i32, { i8, %rt2, i8 }, i32 }
-%rt2 = type { i64, { i6, %rt3 } }
-%rt3 = type { %rt1 }
-
-define i32 @main() nounwind {
-entry:
-  ; Check that mutually recursive types trigger an error instead of segfaulting,
-  ; when the recursion isn't through a pointer to the type.
-  ; CHECK: Cannot allocate unsized type
-  %0 = alloca %rt2
-  ret i32 0
-}
diff --git a/llvm/test/Verifier/recursive-type-load.ll b/llvm/test/Verifier/recursive-type-load.ll
deleted file mode 100644
index 62a094deabeb15..00000000000000
--- a/llvm/test/Verifier/recursive-type-load.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-%rt2 = type { i32, { i8, %rt2, i8 }, i32 }
-
-define i32 @f(ptr %p) nounwind {
-entry:
-  ; Check that recursive types trigger an error instead of segfaulting, when
-  ; the recursion isn't through a pointer to the type.
-  ; CHECK: loading unsized types is not allowed
-  %0 = load %rt2, ptr %p
-  ret i32 %0
-}
diff --git a/llvm/test/Verifier/recursive-type-store.ll b/llvm/test/Verifier/recursive-type-store.ll
deleted file mode 100644
index ed815f8e1782c5..00000000000000
--- a/llvm/test/Verifier/recursive-type-store.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-%rt2 = type { i32, { i8, %rt2, i8 }, i32 }
-
-define void @f(%rt2 %r, ptr %p) nounwind {
-entry:
-  ; Check that recursive types trigger an error instead of segfaulting, when
-  ; the recursion isn't through a pointer to the type.
-  ; CHECK: storing unsized types is not allowed
-  store %rt2 %r, ptr %p
-  ret void
-}
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 982d00c5d33593..c5e1d45e8b2dc0 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -59,7 +59,7 @@ class TargetLibraryInfoTest : public testing::Test {
 
 // Check that we don't accept egregiously incorrect prototypes.
 TEST_F(TargetLibraryInfoTest, InvalidProto) {
-  parseAssembly("%foo = type { %foo }\n");
+  parseAssembly("%foo = type opaque\n");
 
   auto *StructTy = StructType::getTypeByName(Context, "foo");
   auto *InvalidFTy = FunctionType::get(StructTy, /*isVarArg=*/false);

``````````

</details>


https://github.com/llvm/llvm-project/pull/114799


More information about the llvm-commits mailing list