[llvm] [NFC] Add DIExpression::extractLeadingOffset (PR #97719)

Orlando Cazalet-Hyams via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 02:08:56 PDT 2024


https://github.com/OCHyams updated https://github.com/llvm/llvm-project/pull/97719

>From c24695998af8079573a547a43d1442904c774fa4 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Thu, 4 Jul 2024 02:31:40 +0100
Subject: [PATCH 1/3] [NFC] Add DIExpression::extractLeadingOffset

Patch [2/x] to fix structured bindings debug info in SROA.

It extracts a constant offset from the DIExpression if there is one and fills
RemainingOps with the ops that come after it.

This function will be used in a subsequent patch.
---
 llvm/include/llvm/IR/DebugInfoMetadata.h | 10 +++
 llvm/lib/IR/DebugInfoMetadata.cpp        | 39 ++++++++++++
 llvm/unittests/IR/MetadataTest.cpp       | 78 ++++++++++++++++++++++++
 3 files changed, 127 insertions(+)

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index d953ce46efb30..a316c7bde64b0 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2982,6 +2982,16 @@ class DIExpression : public MDNode {
   /// return true with an offset of zero.
   bool extractIfOffset(int64_t &Offset) const;
 
+  /// Assuming that the expression operates on an address, extract a constant
+  /// offset and the succsessive ops. Return false if the expression contains
+  /// any incompatible ops (including non-zero DW_OP_LLVM_args - only a single
+  /// address operand to the expression is permitted).
+  ///
+  /// We don't try very hard to interpret the expression because we assume that
+  /// foldConstantMath has canonicalized the expression.
+  bool extractLeadingOffset(int64_t &Offset,
+                            SmallVectorImpl<uint64_t> &RemainingOps) const;
+
   /// Returns true iff this DIExpression contains at least one instance of
   /// `DW_OP_LLVM_arg, n` for all n in [0, N).
   bool hasAllLocationOps(unsigned N) const;
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 161a30dfb3828..f800f324d05ac 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1760,6 +1760,45 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
   return false;
 }
 
+bool DIExpression::extractLeadingOffset(
+    int64_t &Offset, SmallVectorImpl<uint64_t> &RemainingOps) const {
+  Offset = 0;
+  RemainingOps.clear();
+
+  auto SingleLocEltsOpt = getSingleLocationExpressionElements();
+  if (!SingleLocEltsOpt)
+    return false;
+
+  auto ExprOpEnd = expr_op_iterator(SingleLocEltsOpt->end());
+  auto ExprOpIt = expr_op_iterator(SingleLocEltsOpt->begin());
+  while (ExprOpIt != ExprOpEnd) {
+    uint64_t Op = ExprOpIt->getOp();
+    if (Op == dwarf::DW_OP_deref || Op == dwarf::DW_OP_deref_size ||
+        Op == dwarf::DW_OP_deref_type || Op == dwarf::DW_OP_LLVM_fragment ||
+        Op == dwarf::DW_OP_LLVM_extract_bits_zext ||
+        Op == dwarf::DW_OP_LLVM_extract_bits_sext) {
+      break;
+    } else if (Op == dwarf::DW_OP_plus_uconst) {
+      Offset += ExprOpIt->getArg(0);
+    } else if (Op == dwarf::DW_OP_constu) {
+      uint64_t Value = ExprOpIt->getArg(0);
+      ++ExprOpIt;
+      if (ExprOpIt->getOp() == dwarf::DW_OP_plus)
+        Offset += Value;
+      else if (ExprOpIt->getOp() == dwarf::DW_OP_minus)
+        Offset -= Value;
+      else
+        return false;
+    } else {
+      // Not a const plus/minus operation or deref.
+      return false;
+    }
+    ++ExprOpIt;
+  }
+  RemainingOps.append(ExprOpIt.getBase(), ExprOpEnd.getBase());
+  return true;
+}
+
 bool DIExpression::hasAllLocationOps(unsigned N) const {
   SmallDenseSet<uint64_t, 4> SeenOps;
   for (auto ExprOp : expr_ops())
diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp
index 3f766a414f08f..29c9bd0a4cd9a 100644
--- a/llvm/unittests/IR/MetadataTest.cpp
+++ b/llvm/unittests/IR/MetadataTest.cpp
@@ -3870,6 +3870,84 @@ TEST_F(DIExpressionTest, createFragmentExpression) {
 #undef EXPECT_INVALID_FRAGMENT
 }
 
+TEST_F(DIExpressionTest, extractLeadingOffset) {
+  int64_t Offset;
+  SmallVector<uint64_t> Remaining;
+  using namespace dwarf;
+#define OPS(...) SmallVector<uint64_t>(ArrayRef<uint64_t>{__VA_ARGS__})
+#define EXTRACT_FROM(...)                                                      \
+  DIExpression::get(Context, {__VA_ARGS__})                                    \
+      ->extractLeadingOffset(Offset, Remaining)
+  // Test the number of expression inputs
+  // ------------------------------------
+  //
+  // Single location expressions are permitted.
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 2));
+  EXPECT_EQ(Offset, 2);
+  EXPECT_EQ(Remaining.size(), 0);
+  // This is also a single-location.
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 2));
+  EXPECT_EQ(Offset, 2);
+  EXPECT_EQ(Remaining.size(), 0);
+  // Variadic locations are not permitted. A non-zero arg is assumed to
+  // indicate multiple inputs.
+  EXPECT_FALSE(EXTRACT_FROM(DW_OP_LLVM_arg, 1));
+  EXPECT_FALSE(EXTRACT_FROM(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus));
+
+  // Test offsets expressions
+  // ------------------------
+  EXPECT_TRUE(EXTRACT_FROM());
+  EXPECT_EQ(Offset, 0);
+  EXPECT_EQ(Remaining.size(), 0u);
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_constu, 4, DW_OP_plus));
+  EXPECT_EQ(Offset, 4);
+  EXPECT_EQ(Remaining.size(), 0u);
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_constu, 2, DW_OP_minus));
+  EXPECT_EQ(Offset, -2);
+  EXPECT_EQ(Remaining.size(), 0u);
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 8));
+  EXPECT_EQ(Offset, 8);
+  EXPECT_EQ(Remaining.size(), 0u);
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 4, DW_OP_constu, 2, DW_OP_minus));
+  EXPECT_EQ(Offset, 2);
+  EXPECT_EQ(Remaining.size(), 0u);
+
+  // Not all operations are permitted for simplicity. Can be added
+  // if needed in future.
+  EXPECT_FALSE(EXTRACT_FROM(DW_OP_constu, 2, DW_OP_mul));
+
+  // Test "remaining ops"
+  // --------------------
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 4, DW_OP_constu, 8, DW_OP_minus,
+                           DW_OP_LLVM_fragment, 0, 32));
+  EXPECT_EQ(Remaining, OPS(DW_OP_LLVM_fragment, 0, 32));
+  EXPECT_EQ(Offset, -4);
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_deref));
+  EXPECT_EQ(Remaining, OPS(DW_OP_deref));
+  EXPECT_EQ(Offset, 0);
+
+  // Check things after the non-offset ops are added too.
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 2, DW_OP_deref_size, 4,
+                           DW_OP_stack_value));
+  EXPECT_EQ(Remaining, OPS(DW_OP_deref_size, 4, DW_OP_stack_value));
+  EXPECT_EQ(Offset, 2);
+
+  // DW_OP_deref_type isn't supported in LLVM so this currently fails.
+  EXPECT_FALSE(EXTRACT_FROM(DW_OP_deref_type, 0));
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_LLVM_extract_bits_zext, 0, 8));
+  EXPECT_EQ(Remaining, OPS(DW_OP_LLVM_extract_bits_zext, 0, 8));
+
+  EXPECT_TRUE(EXTRACT_FROM(DW_OP_LLVM_extract_bits_sext, 4, 4));
+  EXPECT_EQ(Remaining, OPS(DW_OP_LLVM_extract_bits_sext, 4, 4));
+#undef EXTRACT_FROM
+}
+
 TEST_F(DIExpressionTest, convertToUndefExpression) {
 #define EXPECT_UNDEF_OPS_EQUAL(TestExpr, Expected)                             \
   do {                                                                         \

>From c87668df8b7cc8d358ae15727f9c539bdcd3a17b Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlandoch.och at gmail.com>
Date: Mon, 8 Jul 2024 09:14:54 +0100
Subject: [PATCH 2/3] Update llvm/include/llvm/IR/DebugInfoMetadata.h

Co-authored-by: Stephen Tozer <Melamoto at gmail.com>
---
 llvm/include/llvm/IR/DebugInfoMetadata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index a316c7bde64b0..0660504bbfc6e 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2983,7 +2983,7 @@ class DIExpression : public MDNode {
   bool extractIfOffset(int64_t &Offset) const;
 
   /// Assuming that the expression operates on an address, extract a constant
-  /// offset and the succsessive ops. Return false if the expression contains
+  /// offset and the successive ops. Return false if the expression contains
   /// any incompatible ops (including non-zero DW_OP_LLVM_args - only a single
   /// address operand to the expression is permitted).
   ///

>From 0bb27ac7576214bd0636f6830e1b520d24559763 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Sun, 7 Jul 2024 22:40:06 +0100
Subject: [PATCH 3/3] Rename Offset -> OffsetInBytes

---
 llvm/include/llvm/IR/DebugInfoMetadata.h |  2 +-
 llvm/lib/IR/DebugInfoMetadata.cpp        | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 0660504bbfc6e..a6220035d25c2 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2989,7 +2989,7 @@ class DIExpression : public MDNode {
   ///
   /// We don't try very hard to interpret the expression because we assume that
   /// foldConstantMath has canonicalized the expression.
-  bool extractLeadingOffset(int64_t &Offset,
+  bool extractLeadingOffset(int64_t &OffsetInBytes,
                             SmallVectorImpl<uint64_t> &RemainingOps) const;
 
   /// Returns true iff this DIExpression contains at least one instance of
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index f800f324d05ac..b4f7a9df41281 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1761,8 +1761,8 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
 }
 
 bool DIExpression::extractLeadingOffset(
-    int64_t &Offset, SmallVectorImpl<uint64_t> &RemainingOps) const {
-  Offset = 0;
+    int64_t &OffsetInBytes, SmallVectorImpl<uint64_t> &RemainingOps) const {
+  OffsetInBytes = 0;
   RemainingOps.clear();
 
   auto SingleLocEltsOpt = getSingleLocationExpressionElements();
@@ -1779,14 +1779,14 @@ bool DIExpression::extractLeadingOffset(
         Op == dwarf::DW_OP_LLVM_extract_bits_sext) {
       break;
     } else if (Op == dwarf::DW_OP_plus_uconst) {
-      Offset += ExprOpIt->getArg(0);
+      OffsetInBytes += ExprOpIt->getArg(0);
     } else if (Op == dwarf::DW_OP_constu) {
       uint64_t Value = ExprOpIt->getArg(0);
       ++ExprOpIt;
       if (ExprOpIt->getOp() == dwarf::DW_OP_plus)
-        Offset += Value;
+        OffsetInBytes += Value;
       else if (ExprOpIt->getOp() == dwarf::DW_OP_minus)
-        Offset -= Value;
+        OffsetInBytes -= Value;
       else
         return false;
     } else {



More information about the llvm-commits mailing list