[llvm] [DebugInfo] Handle DW_OP_LLVM_extract_bits in SROA (PR #94638)

John Brawn via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 6 09:25:23 PDT 2024


https://github.com/john-brawn-arm created https://github.com/llvm/llvm-project/pull/94638

Changes to make SROA handle DW_OP_LLVM_extract_bits

TODO: Better commit message
TODO: Handling of sign mismatch in getActiveBits maybe not right
TODO: Maybe I can do something about the FIXMEs
TODO: Adjusting getFragmentSizeInBits probably wrong, instead use getActiveBits in valueCoversEntireFragment in Transforms/Utils/Local.cpp

>From 7b0ac402bcebd905fd669bb6de2e39209fadda09 Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Wed, 29 May 2024 10:38:28 +0100
Subject: [PATCH 1/5] [DebugInfo] Add DW_OP_LLVM_extract_bits

This operation extracts a number of bits at a given offset and sign or
zero extends them, which is done by emitting it as a left shift
followed by a right shift.

This is being added for use in clang for C++ structured bindings of
bitfields that have offset or size that aren't a byte multiple. A new
operation is being added, instead of shifts being used directly, as it
makes correctly handling it in optimisations (which will be done in a
later patch) much easier.
---
 llvm/docs/LangRef.rst                         |  7 ++
 llvm/include/llvm/BinaryFormat/Dwarf.h        |  1 +
 llvm/lib/BinaryFormat/Dwarf.cpp               |  3 +
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 32 ++++++
 llvm/lib/IR/AsmWriter.cpp                     |  4 +
 llvm/lib/IR/DebugInfoMetadata.cpp             |  3 +
 .../DebugInfo/X86/DW_OP_LLVM_extract_bits.ll  | 99 +++++++++++++++++++
 7 files changed, 149 insertions(+)
 create mode 100644 llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index c58f7f7140e47..7b4e91d09f342 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -6312,6 +6312,13 @@ The current supported opcode vocabulary is limited:
   (``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the
   expression stack is to be converted. Maps into a ``DW_OP_convert`` operation
   that references a base type constructed from the supplied values.
+- ``DW_OP_LLVM_extract_bits, 16, 8, DW_ATE_signed`` specifies the offset, size,
+  and encoding (``16``, ``8``, and ``DW_ATE_signed`` here, respectively) of bits
+  that are to be extracted from the value at the top of the expression stack.
+  If the top of the expression stack is a memory location then these bits are
+  extracted from the value pointed to by that memory location. Maps into a
+  ``DW_OP_shl`` followed by ``DW_OP_shr`` or ``DW_OP_shra`` (depending on
+  encoding).
 - ``DW_OP_LLVM_tag_offset, tag_offset`` specifies that a memory tag should be
   optionally applied to the pointer. The memory tag is derived from the
   given tag offset in an implementation-defined manner.
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 74c4d6ff3a716..7ae265484be58 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -144,6 +144,7 @@ enum LocationAtom {
   DW_OP_LLVM_entry_value = 0x1003,      ///< Only used in LLVM metadata.
   DW_OP_LLVM_implicit_pointer = 0x1004, ///< Only used in LLVM metadata.
   DW_OP_LLVM_arg = 0x1005,              ///< Only used in LLVM metadata.
+  DW_OP_LLVM_extract_bits = 0x1006,     ///< Only used in LLVM metadata.
 };
 
 enum LlvmUserLocationAtom {
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index 7324266172684..d9668dffabec6 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -155,6 +155,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
     return "DW_OP_LLVM_implicit_pointer";
   case DW_OP_LLVM_arg:
     return "DW_OP_LLVM_arg";
+  case DW_OP_LLVM_extract_bits:
+    return "DW_OP_LLVM_extract_bits";
   }
 }
 
@@ -169,6 +171,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
       .Case("DW_OP_LLVM_entry_value", DW_OP_LLVM_entry_value)
       .Case("DW_OP_LLVM_implicit_pointer", DW_OP_LLVM_implicit_pointer)
       .Case("DW_OP_LLVM_arg", DW_OP_LLVM_arg)
+      .Case("DW_OP_LLVM_extract_bits", DW_OP_LLVM_extract_bits)
       .Default(0);
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index a74d43897d45b..87beeb7d6bc9a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 
@@ -546,6 +547,37 @@ bool DwarfExpression::addExpression(
       LocationKind = Unknown;
       return true;
     }
+    case dwarf::DW_OP_LLVM_extract_bits: {
+      unsigned SizeInBits = Op->getArg(1);
+      unsigned BitOffset = Op->getArg(0);
+      dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(2));
+
+      // If we have a memory location then dereference to get the value
+      if (isMemoryLocation())
+        emitOp(dwarf::DW_OP_deref);
+
+      // Extract the bits by a shift left (to shift out the bits after what we
+      // want to extract) followed by shift right (to shift the bits to position
+      // 0 and also sign/zero extend). These operations are done in the DWARF
+      // "generic type" whose size is the size of a pointer.
+      unsigned PtrSizeInBytes = CU.getAsmPrinter()->MAI->getCodePointerSize();
+      unsigned LeftShift = PtrSizeInBytes * 8 - (SizeInBits + BitOffset);
+      unsigned RightShift = LeftShift + BitOffset;
+      if (LeftShift) {
+        emitOp(dwarf::DW_OP_constu);
+        emitUnsigned(LeftShift);
+        emitOp(dwarf::DW_OP_shl);
+      }
+      emitOp(dwarf::DW_OP_constu);
+      emitUnsigned(RightShift);
+      emitOp(Encoding == dwarf::DW_ATE_signed ? dwarf::DW_OP_shra
+                                              : dwarf::DW_OP_shr);
+
+      // The value is now at the top of the stack, so set the location to
+      // implicit so that we get a stack_value at the end.
+      LocationKind = Implicit;
+      break;
+    }
     case dwarf::DW_OP_plus_uconst:
       assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_plus_uconst);
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 8b1a21f962b08..4f5935de42bb0 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2468,6 +2468,10 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
       if (Op.getOp() == dwarf::DW_OP_LLVM_convert) {
         Out << FS << Op.getArg(0);
         Out << FS << dwarf::AttributeEncodingString(Op.getArg(1));
+      } else if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits) {
+        Out << FS << Op.getArg(0);
+        Out << FS << Op.getArg(1);
+        Out << FS << dwarf::AttributeEncodingString(Op.getArg(2));
       } else {
         for (unsigned A = 0, AE = Op.getNumArgs(); A != AE; ++A)
           Out << FS << Op.getArg(A);
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 9bd1d7880c9f8..5e69192d5c52f 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1402,6 +1402,8 @@ unsigned DIExpression::ExprOperand::getSize() const {
     return 2;
 
   switch (Op) {
+  case dwarf::DW_OP_LLVM_extract_bits:
+    return 4;
   case dwarf::DW_OP_LLVM_convert:
   case dwarf::DW_OP_LLVM_fragment:
   case dwarf::DW_OP_bregx:
@@ -1474,6 +1476,7 @@ bool DIExpression::isValid() const {
     case dwarf::DW_OP_LLVM_convert:
     case dwarf::DW_OP_LLVM_arg:
     case dwarf::DW_OP_LLVM_tag_offset:
+    case dwarf::DW_OP_LLVM_extract_bits:
     case dwarf::DW_OP_constu:
     case dwarf::DW_OP_plus_uconst:
     case dwarf::DW_OP_plus:
diff --git a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
new file mode 100644
index 0000000000000..da0eec669b50c
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
@@ -0,0 +1,99 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump --debug-info %t | FileCheck %s
+
+%struct.struct_t = type { i8 }
+
+ at g = dso_local global %struct.struct_t zeroinitializer, align 1, !dbg !0
+
+; CHECK-LABEL: DW_TAG_subprogram
+; CHECK: DW_AT_name ("test1")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location (DW_OP_fbreg -1, DW_OP_deref, DW_OP_constu 0x3d, DW_OP_shl, DW_OP_constu 0x3d, DW_OP_shr, DW_OP_stack_value)
+; CHECK: DW_AT_name ("x")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location (DW_OP_fbreg -1, DW_OP_deref, DW_OP_constu 0x39, DW_OP_shl, DW_OP_constu 0x3c, DW_OP_shra, DW_OP_stack_value)
+; CHECK: DW_AT_name ("y")
+
+define i32 @test1() !dbg !13 {
+entry:
+  %0 = alloca %struct.struct_t, align 1
+  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !17, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !18
+  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !19, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !21
+  ret i32 0, !dbg !22
+}
+
+; CHECK-LABEL: DW_TAG_subprogram
+; CHECK: DW_AT_name ("test2")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x3d, DW_OP_shl, DW_OP_constu 0x3d, DW_OP_shr, DW_OP_stack_value)
+; CHECK: DW_AT_name ("x")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x39, DW_OP_shl, DW_OP_constu 0x3c, DW_OP_shra, DW_OP_stack_value)
+; CHECK: DW_AT_name ("y")
+
+define i8 @test2() !dbg !23 {
+entry:
+  %0 = load i8, ptr @g, align 1
+  tail call void @llvm.dbg.value(metadata i8 %0, metadata !24, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !25
+  tail call void @llvm.dbg.value(metadata i8 %0, metadata !26, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !27
+  ret i8 %0, !dbg !28
+}
+
+; CHECK-LABEL: DW_TAG_subprogram
+; CHECK: DW_AT_name ("test3")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0x3f, DW_OP_shr, DW_OP_stack_value)
+; CHECK: DW_AT_name ("x")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0x3f, DW_OP_shra, DW_OP_stack_value)
+; CHECK: DW_AT_name ("y")
+
+define i64 @test3(ptr %p) !dbg !29 {
+entry:
+  %0 = load i64, ptr %p, align 8
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !33, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_unsigned)), !dbg !30
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !34, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_signed)), !dbg !31
+  ret i64 %0, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!11, !12}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 6, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "DW_OP_bit_piece.cpp", directory: "./")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "struct_t", file: !3, line: 4, baseType: !6)
+!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS8struct_t")
+!7 = !{!8, !10}
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !6, file: !3, line: 2, baseType: !9, size: 3, flags: DIFlagBitField, extraData: i64 0)
+!9 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !6, file: !3, line: 3, baseType: !9, size: 4, offset: 3, flags: DIFlagBitField, extraData: i64 0)
+!11 = !{i32 7, !"Dwarf Version", i32 5}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!9}
+!16 = !{}
+!17 = !DILocalVariable(name: "x", scope: !13, file: !3, line: 9, type: !9)
+!18 = !DILocation(line: 9, column: 9, scope: !13)
+!19 = !DILocalVariable(name: "y", scope: !13, file: !3, line: 9, type: !20)
+!20 = !DIBasicType(name: "signed int", size: 32, encoding: DW_ATE_signed)
+!21 = !DILocation(line: 9, column: 12, scope: !13)
+!22 = !DILocation(line: 10, column: 3, scope: !13)
+!23 = distinct !DISubprogram(name: "test2", linkageName: "test2", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
+!24 = !DILocalVariable(name: "x", scope: !23, file: !3, line: 9, type: !9)
+!25 = !DILocation(line: 9, column: 9, scope: !23)
+!26 = !DILocalVariable(name: "y", scope: !23, file: !3, line: 9, type: !20)
+!27 = !DILocation(line: 9, column: 12, scope: !23)
+!28 = !DILocation(line: 10, column: 3, scope: !23)
+!29 = distinct !DISubprogram(name: "test3", linkageName: "test3", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
+!30 = !DILocation(line: 9, column: 9, scope: !29)
+!31 = !DILocation(line: 9, column: 12, scope: !29)
+!32 = !DILocation(line: 10, column: 3, scope: !29)
+!33 = !DILocalVariable(name: "x", scope: !29, file: !3, line: 9, type: !9)
+!34 = !DILocalVariable(name: "y", scope: !29, file: !3, line: 9, type: !20)

>From 7dab730797a93d7534a607fc50e0947510a598da Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Mon, 3 Jun 2024 16:12:19 +0100
Subject: [PATCH 2/5] Remove some unnecessary debug metadata in test

---
 .../DebugInfo/X86/DW_OP_LLVM_extract_bits.ll  | 65 +++++++++----------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
index da0eec669b50c..eeaffb47817a7 100644
--- a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
+++ b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
@@ -17,9 +17,9 @@
 define i32 @test1() !dbg !13 {
 entry:
   %0 = alloca %struct.struct_t, align 1
-  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !17, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !18
-  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !19, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !21
-  ret i32 0, !dbg !22
+  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !16, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !17
+  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !18, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !17
+  ret i32 0, !dbg !17
 }
 
 ; CHECK-LABEL: DW_TAG_subprogram
@@ -31,12 +31,12 @@ entry:
 ; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x39, DW_OP_shl, DW_OP_constu 0x3c, DW_OP_shra, DW_OP_stack_value)
 ; CHECK: DW_AT_name ("y")
 
-define i8 @test2() !dbg !23 {
+define i8 @test2() !dbg !20 {
 entry:
   %0 = load i8, ptr @g, align 1
-  tail call void @llvm.dbg.value(metadata i8 %0, metadata !24, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !25
-  tail call void @llvm.dbg.value(metadata i8 %0, metadata !26, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !27
-  ret i8 %0, !dbg !28
+  tail call void @llvm.dbg.value(metadata i8 %0, metadata !21, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !22
+  tail call void @llvm.dbg.value(metadata i8 %0, metadata !23, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !22
+  ret i8 %0, !dbg !22
 }
 
 ; CHECK-LABEL: DW_TAG_subprogram
@@ -48,12 +48,12 @@ entry:
 ; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0x3f, DW_OP_shra, DW_OP_stack_value)
 ; CHECK: DW_AT_name ("y")
 
-define i64 @test3(ptr %p) !dbg !29 {
+define i64 @test3(ptr %p) !dbg !24 {
 entry:
   %0 = load i64, ptr %p, align 8
-  tail call void @llvm.dbg.value(metadata i64 %0, metadata !33, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_unsigned)), !dbg !30
-  tail call void @llvm.dbg.value(metadata i64 %0, metadata !34, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_signed)), !dbg !31
-  ret i64 %0, !dbg !32
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_unsigned)), !dbg !26
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !27, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_signed)), !dbg !26
+  ret i64 %0, !dbg !26
 }
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata)
@@ -63,37 +63,30 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !llvm.module.flags = !{!11, !12}
 
 !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
-!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 6, type: !5, isLocal: false, isDefinition: true)
+!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, type: !5, isLocal: false, isDefinition: true)
 !2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
 !3 = !DIFile(filename: "DW_OP_bit_piece.cpp", directory: "./")
 !4 = !{!0}
-!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "struct_t", file: !3, line: 4, baseType: !6)
-!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS8struct_t")
+!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "struct_t", file: !3, baseType: !6)
+!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS8struct_t")
 !7 = !{!8, !10}
-!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !6, file: !3, line: 2, baseType: !9, size: 3, flags: DIFlagBitField, extraData: i64 0)
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !6, file: !3, baseType: !9, size: 3, flags: DIFlagBitField, extraData: i64 0)
 !9 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
-!10 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !6, file: !3, line: 3, baseType: !9, size: 4, offset: 3, flags: DIFlagBitField, extraData: i64 0)
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !6, file: !3, baseType: !9, size: 4, offset: 3, flags: DIFlagBitField, extraData: i64 0)
 !11 = !{i32 7, !"Dwarf Version", i32 5}
 !12 = !{i32 2, !"Debug Info Version", i32 3}
-!13 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
+!13 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: !3, file: !3, type: !14, spFlags: DISPFlagDefinition, unit: !2)
 !14 = !DISubroutineType(types: !15)
 !15 = !{!9}
-!16 = !{}
-!17 = !DILocalVariable(name: "x", scope: !13, file: !3, line: 9, type: !9)
-!18 = !DILocation(line: 9, column: 9, scope: !13)
-!19 = !DILocalVariable(name: "y", scope: !13, file: !3, line: 9, type: !20)
-!20 = !DIBasicType(name: "signed int", size: 32, encoding: DW_ATE_signed)
-!21 = !DILocation(line: 9, column: 12, scope: !13)
-!22 = !DILocation(line: 10, column: 3, scope: !13)
-!23 = distinct !DISubprogram(name: "test2", linkageName: "test2", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
-!24 = !DILocalVariable(name: "x", scope: !23, file: !3, line: 9, type: !9)
-!25 = !DILocation(line: 9, column: 9, scope: !23)
-!26 = !DILocalVariable(name: "y", scope: !23, file: !3, line: 9, type: !20)
-!27 = !DILocation(line: 9, column: 12, scope: !23)
-!28 = !DILocation(line: 10, column: 3, scope: !23)
-!29 = distinct !DISubprogram(name: "test3", linkageName: "test3", scope: !3, file: !3, line: 8, type: !14, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !16)
-!30 = !DILocation(line: 9, column: 9, scope: !29)
-!31 = !DILocation(line: 9, column: 12, scope: !29)
-!32 = !DILocation(line: 10, column: 3, scope: !29)
-!33 = !DILocalVariable(name: "x", scope: !29, file: !3, line: 9, type: !9)
-!34 = !DILocalVariable(name: "y", scope: !29, file: !3, line: 9, type: !20)
+!16 = !DILocalVariable(name: "x", scope: !13, file: !3, type: !9)
+!17 = !DILocation(line: 0, scope: !13)
+!18 = !DILocalVariable(name: "y", scope: !13, file: !3, type: !19)
+!19 = !DIBasicType(name: "signed int", size: 32, encoding: DW_ATE_signed)
+!20 = distinct !DISubprogram(name: "test2", linkageName: "test2", scope: !3, file: !3, type: !14, spFlags: DISPFlagDefinition, unit: !2)
+!21 = !DILocalVariable(name: "x", scope: !20, file: !3, type: !9)
+!22 = !DILocation(line: 0, scope: !20)
+!23 = !DILocalVariable(name: "y", scope: !20, file: !3, type: !19)
+!24 = distinct !DISubprogram(name: "test3", linkageName: "test3", scope: !3, file: !3, type: !14, spFlags: DISPFlagDefinition, unit: !2)
+!25 = !DILocalVariable(name: "x", scope: !24, file: !3, type: !9)
+!26 = !DILocation(line: 0, scope: !24)
+!27 = !DILocalVariable(name: "y", scope: !24, file: !3, type: !19)

>From 083dc52f7fff76e095f2242a63356599872c3fff Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Thu, 6 Jun 2024 14:20:01 +0100
Subject: [PATCH 3/5] Put sign information in the opcode instead of as an
 operand.

---
 llvm/docs/LangRef.rst                            | 16 +++++++++-------
 llvm/include/llvm/BinaryFormat/Dwarf.h           | 15 ++++++++-------
 llvm/lib/BinaryFormat/Dwarf.cpp                  |  9 ++++++---
 llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp  |  8 ++++----
 llvm/lib/IR/AsmWriter.cpp                        |  4 ----
 llvm/lib/IR/DebugInfoMetadata.cpp                |  7 ++++---
 .../DebugInfo/X86/DW_OP_LLVM_extract_bits.ll     | 12 ++++++------
 7 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7b4e91d09f342..d1edbb48b654c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -6312,13 +6312,15 @@ The current supported opcode vocabulary is limited:
   (``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the
   expression stack is to be converted. Maps into a ``DW_OP_convert`` operation
   that references a base type constructed from the supplied values.
-- ``DW_OP_LLVM_extract_bits, 16, 8, DW_ATE_signed`` specifies the offset, size,
-  and encoding (``16``, ``8``, and ``DW_ATE_signed`` here, respectively) of bits
-  that are to be extracted from the value at the top of the expression stack.
-  If the top of the expression stack is a memory location then these bits are
-  extracted from the value pointed to by that memory location. Maps into a
-  ``DW_OP_shl`` followed by ``DW_OP_shr`` or ``DW_OP_shra`` (depending on
-  encoding).
+- ``DW_OP_LLVM_extract_bits_sext, 16, 8,`` specifies the offset and size
+  (``16`` and ``8`` here, respectively) of bits that are to be extracted and
+  sign-extended from the value at the top of the expression stack. If the top of
+  the expression stack is a memory location then these bits are extracted from
+  the value pointed to by that memory location. Maps into a ``DW_OP_shl``
+  followed by ``DW_OP_shra``.
+- ``DW_OP_LLVM_extract_bits_zext`` behaves similarly to
+  ``DW_OP_LLVM_extract_bits_sext``, but zero-extends instead of sign-extending.
+  Maps into a ``DW_OP_shl`` followed by ``DW_OP_shr``.
 - ``DW_OP_LLVM_tag_offset, tag_offset`` specifies that a memory tag should be
   optionally applied to the pointer. The memory tag is derived from the
   given tag offset in an implementation-defined manner.
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 7ae265484be58..607f3eb9d4c22 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -138,13 +138,14 @@ enum LocationAtom {
 #include "llvm/BinaryFormat/Dwarf.def"
   DW_OP_lo_user = 0xe0,
   DW_OP_hi_user = 0xff,
-  DW_OP_LLVM_fragment = 0x1000,         ///< Only used in LLVM metadata.
-  DW_OP_LLVM_convert = 0x1001,          ///< Only used in LLVM metadata.
-  DW_OP_LLVM_tag_offset = 0x1002,       ///< Only used in LLVM metadata.
-  DW_OP_LLVM_entry_value = 0x1003,      ///< Only used in LLVM metadata.
-  DW_OP_LLVM_implicit_pointer = 0x1004, ///< Only used in LLVM metadata.
-  DW_OP_LLVM_arg = 0x1005,              ///< Only used in LLVM metadata.
-  DW_OP_LLVM_extract_bits = 0x1006,     ///< Only used in LLVM metadata.
+  DW_OP_LLVM_fragment = 0x1000,          ///< Only used in LLVM metadata.
+  DW_OP_LLVM_convert = 0x1001,           ///< Only used in LLVM metadata.
+  DW_OP_LLVM_tag_offset = 0x1002,        ///< Only used in LLVM metadata.
+  DW_OP_LLVM_entry_value = 0x1003,       ///< Only used in LLVM metadata.
+  DW_OP_LLVM_implicit_pointer = 0x1004,  ///< Only used in LLVM metadata.
+  DW_OP_LLVM_arg = 0x1005,               ///< Only used in LLVM metadata.
+  DW_OP_LLVM_extract_bits_sext = 0x1006, ///< Only used in LLVM metadata.
+  DW_OP_LLVM_extract_bits_zext = 0x1007, ///< Only used in LLVM metadata.
 };
 
 enum LlvmUserLocationAtom {
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index d9668dffabec6..0bf4f201dbe10 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -155,8 +155,10 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
     return "DW_OP_LLVM_implicit_pointer";
   case DW_OP_LLVM_arg:
     return "DW_OP_LLVM_arg";
-  case DW_OP_LLVM_extract_bits:
-    return "DW_OP_LLVM_extract_bits";
+  case DW_OP_LLVM_extract_bits_sext:
+    return "DW_OP_LLVM_extract_bits_sext";
+  case DW_OP_LLVM_extract_bits_zext:
+    return "DW_OP_LLVM_extract_bits_zext";
   }
 }
 
@@ -171,7 +173,8 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
       .Case("DW_OP_LLVM_entry_value", DW_OP_LLVM_entry_value)
       .Case("DW_OP_LLVM_implicit_pointer", DW_OP_LLVM_implicit_pointer)
       .Case("DW_OP_LLVM_arg", DW_OP_LLVM_arg)
-      .Case("DW_OP_LLVM_extract_bits", DW_OP_LLVM_extract_bits)
+      .Case("DW_OP_LLVM_extract_bits_sext", DW_OP_LLVM_extract_bits_sext)
+      .Case("DW_OP_LLVM_extract_bits_zext", DW_OP_LLVM_extract_bits_zext)
       .Default(0);
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 87beeb7d6bc9a..cc96d3c481f70 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -547,10 +547,10 @@ bool DwarfExpression::addExpression(
       LocationKind = Unknown;
       return true;
     }
-    case dwarf::DW_OP_LLVM_extract_bits: {
+    case dwarf::DW_OP_LLVM_extract_bits_sext:
+    case dwarf::DW_OP_LLVM_extract_bits_zext: {
       unsigned SizeInBits = Op->getArg(1);
       unsigned BitOffset = Op->getArg(0);
-      dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(2));
 
       // If we have a memory location then dereference to get the value
       if (isMemoryLocation())
@@ -570,8 +570,8 @@ bool DwarfExpression::addExpression(
       }
       emitOp(dwarf::DW_OP_constu);
       emitUnsigned(RightShift);
-      emitOp(Encoding == dwarf::DW_ATE_signed ? dwarf::DW_OP_shra
-                                              : dwarf::DW_OP_shr);
+      emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext ? dwarf::DW_OP_shra
+                                                          : dwarf::DW_OP_shr);
 
       // The value is now at the top of the stack, so set the location to
       // implicit so that we get a stack_value at the end.
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 4f5935de42bb0..8b1a21f962b08 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2468,10 +2468,6 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
       if (Op.getOp() == dwarf::DW_OP_LLVM_convert) {
         Out << FS << Op.getArg(0);
         Out << FS << dwarf::AttributeEncodingString(Op.getArg(1));
-      } else if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits) {
-        Out << FS << Op.getArg(0);
-        Out << FS << Op.getArg(1);
-        Out << FS << dwarf::AttributeEncodingString(Op.getArg(2));
       } else {
         for (unsigned A = 0, AE = Op.getNumArgs(); A != AE; ++A)
           Out << FS << Op.getArg(A);
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 5e69192d5c52f..2b45932093f0f 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1402,10 +1402,10 @@ unsigned DIExpression::ExprOperand::getSize() const {
     return 2;
 
   switch (Op) {
-  case dwarf::DW_OP_LLVM_extract_bits:
-    return 4;
   case dwarf::DW_OP_LLVM_convert:
   case dwarf::DW_OP_LLVM_fragment:
+  case dwarf::DW_OP_LLVM_extract_bits_sext:
+  case dwarf::DW_OP_LLVM_extract_bits_zext:
   case dwarf::DW_OP_bregx:
     return 3;
   case dwarf::DW_OP_constu:
@@ -1476,7 +1476,8 @@ bool DIExpression::isValid() const {
     case dwarf::DW_OP_LLVM_convert:
     case dwarf::DW_OP_LLVM_arg:
     case dwarf::DW_OP_LLVM_tag_offset:
-    case dwarf::DW_OP_LLVM_extract_bits:
+    case dwarf::DW_OP_LLVM_extract_bits_sext:
+    case dwarf::DW_OP_LLVM_extract_bits_zext:
     case dwarf::DW_OP_constu:
     case dwarf::DW_OP_plus_uconst:
     case dwarf::DW_OP_plus:
diff --git a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
index eeaffb47817a7..6fbf788e4cf94 100644
--- a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
+++ b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
@@ -17,8 +17,8 @@
 define i32 @test1() !dbg !13 {
 entry:
   %0 = alloca %struct.struct_t, align 1
-  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !16, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !17
-  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !18, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !17
+  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !16, metadata !DIExpression(DW_OP_LLVM_extract_bits_zext, 0, 3)), !dbg !17
+  tail call void @llvm.dbg.declare(metadata ptr %0, metadata !18, metadata !DIExpression(DW_OP_LLVM_extract_bits_sext, 3, 4)), !dbg !17
   ret i32 0, !dbg !17
 }
 
@@ -34,8 +34,8 @@ entry:
 define i8 @test2() !dbg !20 {
 entry:
   %0 = load i8, ptr @g, align 1
-  tail call void @llvm.dbg.value(metadata i8 %0, metadata !21, metadata !DIExpression(DW_OP_LLVM_extract_bits, 0, 3, DW_ATE_unsigned)), !dbg !22
-  tail call void @llvm.dbg.value(metadata i8 %0, metadata !23, metadata !DIExpression(DW_OP_LLVM_extract_bits, 3, 4, DW_ATE_signed)), !dbg !22
+  tail call void @llvm.dbg.value(metadata i8 %0, metadata !21, metadata !DIExpression(DW_OP_LLVM_extract_bits_zext, 0, 3)), !dbg !22
+  tail call void @llvm.dbg.value(metadata i8 %0, metadata !23, metadata !DIExpression(DW_OP_LLVM_extract_bits_sext, 3, 4)), !dbg !22
   ret i8 %0, !dbg !22
 }
 
@@ -51,8 +51,8 @@ entry:
 define i64 @test3(ptr %p) !dbg !24 {
 entry:
   %0 = load i64, ptr %p, align 8
-  tail call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_unsigned)), !dbg !26
-  tail call void @llvm.dbg.value(metadata i64 %0, metadata !27, metadata !DIExpression(DW_OP_LLVM_extract_bits, 63, 1, DW_ATE_signed)), !dbg !26
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression(DW_OP_LLVM_extract_bits_zext, 63, 1)), !dbg !26
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !27, metadata !DIExpression(DW_OP_LLVM_extract_bits_sext, 63, 1)), !dbg !26
   ret i64 %0, !dbg !26
 }
 

>From 342011f96768a0827198482a6884325534acd5ae Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Thu, 6 Jun 2024 16:39:16 +0100
Subject: [PATCH 4/5] Use regex in tests

---
 llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
index 6fbf788e4cf94..18fdfa579b9f1 100644
--- a/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
+++ b/llvm/test/DebugInfo/X86/DW_OP_LLVM_extract_bits.ll
@@ -25,10 +25,10 @@ entry:
 ; CHECK-LABEL: DW_TAG_subprogram
 ; CHECK: DW_AT_name ("test2")
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x3d, DW_OP_shl, DW_OP_constu 0x3d, DW_OP_shr, DW_OP_stack_value)
+; CHECK: DW_AT_location (DW_OP_breg0 {{R[^+]+}}+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x3d, DW_OP_shl, DW_OP_constu 0x3d, DW_OP_shr, DW_OP_stack_value)
 ; CHECK: DW_AT_name ("x")
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x39, DW_OP_shl, DW_OP_constu 0x3c, DW_OP_shra, DW_OP_stack_value)
+; CHECK: DW_AT_location (DW_OP_breg0 {{R[^+]+}}+0, DW_OP_constu 0xff, DW_OP_and, DW_OP_constu 0x39, DW_OP_shl, DW_OP_constu 0x3c, DW_OP_shra, DW_OP_stack_value)
 ; CHECK: DW_AT_name ("y")
 
 define i8 @test2() !dbg !20 {
@@ -42,10 +42,10 @@ entry:
 ; CHECK-LABEL: DW_TAG_subprogram
 ; CHECK: DW_AT_name ("test3")
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0x3f, DW_OP_shr, DW_OP_stack_value)
+; CHECK: DW_AT_location (DW_OP_breg0 {{R[^+]+}}+0, DW_OP_constu 0x3f, DW_OP_shr, DW_OP_stack_value)
 ; CHECK: DW_AT_name ("x")
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_location (DW_OP_breg0 RAX+0, DW_OP_constu 0x3f, DW_OP_shra, DW_OP_stack_value)
+; CHECK: DW_AT_location (DW_OP_breg0 {{R[^+]+}}+0, DW_OP_constu 0x3f, DW_OP_shra, DW_OP_stack_value)
 ; CHECK: DW_AT_name ("y")
 
 define i64 @test3(ptr %p) !dbg !24 {

>From eaea27c9a2ff0238b138cad42a94d4b5f87cc86d Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Wed, 29 May 2024 16:02:10 +0100
Subject: [PATCH 5/5] [DebugInfo] Handle DW_OP_LLVM_extract_bits in SROA

Changes to make SROA handle DW_OP_LLVM_extract_bits

TODO: Better commit message
TODO: Handling of sign mismatch in getActiveBits maybe not right
TODO: Maybe I can do something about the FIXMEs
TODO: Adjusting getFragmentSizeInBits probably wrong, instead use getActiveBits
      in valueCoversEntireFragment in Transforms/Utils/Local.cpp
---
 llvm/include/llvm/IR/DebugInfoMetadata.h      |   6 +
 llvm/lib/IR/DebugInfoMetadata.cpp             |  55 ++++++-
 llvm/lib/IR/DebugProgramInstruction.cpp       |   4 +-
 llvm/lib/IR/IntrinsicInst.cpp                 |   4 +-
 .../DebugInfo/Generic/sroa-extract-bits.ll    | 149 ++++++++++++++++++
 5 files changed, 209 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/DebugInfo/Generic/sroa-extract-bits.ll

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 18873a551595a..0007d0a0e86d2 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2903,6 +2903,12 @@ class DIExpression : public MDNode {
     }
   };
 
+  /// Return the number of bits that have an active value, i.e. those that
+  /// aren't known to be zero/sign (depending on the type of Var) and which
+  /// are within the size of this fragment (if it is one). If we can't deduce
+  /// anything from the expression this will return the size of Var.
+  std::optional<uint64_t> getActiveBits(DIVariable *Var);
+
   /// Retrieve the details of this fragment expression.
   static std::optional<FragmentInfo> getFragmentInfo(expr_op_iterator Start,
                                                      expr_op_iterator End);
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 2b45932093f0f..dce2e3037747b 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -1679,6 +1679,26 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) {
   return std::nullopt;
 }
 
+std::optional<uint64_t> DIExpression::getActiveBits(DIVariable *Var) {
+  std::optional<uint64_t> BitWidth = Var->getSizeInBits();
+  for (auto Op : expr_ops()) {
+    if ((Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext && Var->getSignedness() != DIBasicType::Signedness::Unsigned) ||
+        (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_sext && Var->getSignedness() != DIBasicType::Signedness::Signed)) {
+      BitWidth = Var->getSizeInBits();
+      continue;
+    }
+    if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
+        Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_sext ||
+        Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
+      if (BitWidth)
+        BitWidth = std::min(*BitWidth, Op.getArg(1));
+      else
+        BitWidth = Op.getArg(1);
+    }
+  }
+  return BitWidth;
+}
+
 void DIExpression::appendOffset(SmallVectorImpl<uint64_t> &Ops,
                                 int64_t Offset) {
   if (Offset > 0) {
@@ -1931,6 +1951,8 @@ std::optional<DIExpression *> DIExpression::createFragmentExpression(
   // Track whether it's safe to split the value at the top of the DWARF stack,
   // assuming that it'll be used as an implicit location value.
   bool CanSplitValue = true;
+  // Track whether we need to add a fragment expression to the end of Expr.
+  bool EmitFragment = true;
   // Copy over the expression, but leave off any trailing DW_OP_LLVM_fragment.
   if (Expr) {
     for (auto Op : Expr->expr_ops()) {
@@ -1966,6 +1988,11 @@ std::optional<DIExpression *> DIExpression::createFragmentExpression(
           return std::nullopt;
         break;
       case dwarf::DW_OP_LLVM_fragment: {
+        // If we've decided we don't need a fragment then give up if we see that
+        // there's already a fragment expression.
+        // FIXME: We could probably do better here
+        if (!EmitFragment)
+          return std::nullopt;
         // Make the new offset point into the existing fragment.
         uint64_t FragmentOffsetInBits = Op.getArg(0);
         uint64_t FragmentSizeInBits = Op.getArg(1);
@@ -1975,15 +2002,37 @@ std::optional<DIExpression *> DIExpression::createFragmentExpression(
         OffsetInBits += FragmentOffsetInBits;
         continue;
       }
+      case dwarf::DW_OP_LLVM_extract_bits_zext:
+      case dwarf::DW_OP_LLVM_extract_bits_sext: {
+        // If we're extracting bits from inside of the fragment that we're
+        // creating then we don't have a fragment after all, and just need to
+        // adjust the offset that we're extracting from.
+        uint64_t ExtractOffsetInBits = Op.getArg(0);
+        uint64_t ExtractSizeInBits = Op.getArg(1);
+        if (ExtractOffsetInBits >= OffsetInBits &&
+            ExtractOffsetInBits + ExtractSizeInBits <= OffsetInBits + SizeInBits) {
+          Ops.push_back(Op.getOp());
+          Ops.push_back(ExtractOffsetInBits - OffsetInBits);
+          Ops.push_back(ExtractSizeInBits);
+          EmitFragment = false;
+          continue;
+        }
+        // If the extracted bits aren't fully contained within the fragment then
+        // give up.
+        // FIXME: We could probably do better here
+        return std::nullopt;
+      }
       }
       Op.appendToVector(Ops);
     }
   }
   assert((!Expr->isImplicit() || CanSplitValue) && "Expr can't be split");
   assert(Expr && "Unknown DIExpression");
-  Ops.push_back(dwarf::DW_OP_LLVM_fragment);
-  Ops.push_back(OffsetInBits);
-  Ops.push_back(SizeInBits);
+  if (EmitFragment) {
+    Ops.push_back(dwarf::DW_OP_LLVM_fragment);
+    Ops.push_back(OffsetInBits);
+    Ops.push_back(SizeInBits);
+  }
   return DIExpression::get(Expr->getContext(), Ops);
 }
 
diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index fbca7cdfcf3f5..552fb3a3d521d 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -372,9 +372,7 @@ bool DbgVariableRecord::isKillLocation() const {
 }
 
 std::optional<uint64_t> DbgVariableRecord::getFragmentSizeInBits() const {
-  if (auto Fragment = getExpression()->getFragmentInfo())
-    return Fragment->SizeInBits;
-  return getVariable()->getSizeInBits();
+  return getExpression()->getActiveBits(getVariable());
 }
 
 DbgRecord *DbgRecord::clone() const {
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index e17755c8ad57b..e007c9b80e1f5 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -196,9 +196,7 @@ void DbgVariableIntrinsic::addVariableLocationOps(ArrayRef<Value *> NewValues,
 }
 
 std::optional<uint64_t> DbgVariableIntrinsic::getFragmentSizeInBits() const {
-  if (auto Fragment = getExpression()->getFragmentInfo())
-    return Fragment->SizeInBits;
-  return getVariable()->getSizeInBits();
+  return getExpression()->getActiveBits(getVariable());
 }
 
 Value *DbgAssignIntrinsic::getAddress() const {
diff --git a/llvm/test/DebugInfo/Generic/sroa-extract-bits.ll b/llvm/test/DebugInfo/Generic/sroa-extract-bits.ll
new file mode 100644
index 0000000000000..54082feac4654
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/sroa-extract-bits.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes='sroa<preserve-cfg>' %s -S | FileCheck %s
+; RUN: opt -passes='sroa<modify-cfg>' %s -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+; The alloca is split into two fragments: variable x is in the first, variables y and z are in the second
+define i8 @test1(i32 %arg) {
+; CHECK-LABEL: define i8 @test1(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG]] to i8
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]], metadata [[META2:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7:![0-9]+]]
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[ARG]], 8
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[PTR_SROA_2_0_EXTRACT_SHIFT]] to i24
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i24 [[PTR_SROA_2_0_EXTRACT_TRUNC]], metadata [[META8:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 16)), !dbg [[DBG7]]
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i24 [[PTR_SROA_2_0_EXTRACT_TRUNC]], metadata [[META9:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    ret i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !2, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !9, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 8)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !8, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 16, 16)), !dbg !7
+  store i32 %arg, ptr %ptr, align 4
+  %ret = load i8, ptr %ptr, align 4
+  ret i8 %ret
+}
+
+; The alloca is split into three fragments corresponding to the variables x, y, z
+define i8 @test2(i32 %arg1, i8 %arg2) {
+; CHECK-LABEL: define i8 @test2(
+; CHECK-SAME: i32 [[ARG1:%.*]], i8 [[ARG2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG1]] to i8
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]], metadata [[META2]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[ARG1]], 8
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[PTR_SROA_2_0_EXTRACT_SHIFT]] to i16
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i16 [[PTR_SROA_2_0_EXTRACT_TRUNC]], metadata [[META9]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 16)), !dbg [[DBG7]]
+; CHECK-NEXT:    [[PTR_SROA_21_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[ARG1]], 24
+; CHECK-NEXT:    [[PTR_SROA_21_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[PTR_SROA_21_0_EXTRACT_SHIFT]] to i8
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i8 [[PTR_SROA_21_0_EXTRACT_TRUNC]], metadata [[META8]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i8 [[ARG2]], metadata [[META8]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    ret i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !2, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !9, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 16)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !8, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 24, 8)), !dbg !7
+  store i32 %arg1, ptr %ptr, align 4
+  %gep = getelementptr i8, ptr %ptr, i32 3
+  store i8 %arg2, ptr %gep, align 1
+  %ret = load i8, ptr %ptr, align 4
+  ret i8 %ret
+}
+
+; The alloca is split into two fragments, with variable x being half in one and half in the other
+; FIXME: We currently generate no debug info for x in this case
+define i8 @test3(i32 %arg) {
+; CHECK-LABEL: define i8 @test3(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG]] to i8
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[ARG]], 8
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[PTR_SROA_2_0_EXTRACT_SHIFT]] to i24
+; CHECK-NEXT:    ret i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !2, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 16)), !dbg !7
+  store i32 %arg, ptr %ptr, align 4
+  %ret = load i8, ptr %ptr, align 4
+  ret i8 %ret
+}
+
+; The alloca is split into two fragments, with variable y being half in one and half in the other
+; FIXME: We currently generate no debug info for y in this case
+define i16 @test4(i32 %arg) {
+; CHECK-LABEL: define i16 @test4(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG]] to i16
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i16 [[PTR_SROA_0_0_EXTRACT_TRUNC]], metadata [[META2]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[ARG]], 16
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[PTR_SROA_2_0_EXTRACT_SHIFT]] to i16
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i16 [[PTR_SROA_2_0_EXTRACT_TRUNC]], metadata [[META8]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    ret i16 [[PTR_SROA_0_0_EXTRACT_TRUNC]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !2, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !9, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 16)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !8, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 24, 8)), !dbg !7
+  store i32 %arg, ptr %ptr, align 4
+  %ret = load i16, ptr %ptr, align 4
+  ret i16 %ret
+}
+
+; Struct where the first element is an ordinary char, the second is a bitfield of two elements, and the third is padding
+%struct.struct_t = type <{ i8, i16, i8 }>
+define i8 @test5(i32 %arg) {
+; CHECK-LABEL: define i8 @test5(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG]] to i8
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]], metadata [[META10:![0-9]+]], metadata !DIExpression()), !dbg [[DBG7]]
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[ARG]], 8
+; CHECK-NEXT:    [[PTR_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[PTR_SROA_2_0_EXTRACT_SHIFT]] to i24
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i24 [[PTR_SROA_2_0_EXTRACT_TRUNC]], metadata [[META8]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i24 [[PTR_SROA_2_0_EXTRACT_TRUNC]], metadata [[META9]], metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 0, 8)), !dbg [[DBG7]]
+; CHECK-NEXT:    ret i8 [[PTR_SROA_0_0_EXTRACT_TRUNC]]
+;
+entry:
+  %ptr = alloca %struct.struct_t, align 4
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !10, metadata !DIExpression()), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !9, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 8, 8)), !dbg !7
+  call void @llvm.dbg.declare(metadata ptr %ptr, metadata !8, metadata !DIExpression(DW_OP_LLVM_extract_bits_unsigned, 16, 8)), !dbg !7
+  store i32 %arg, ptr %ptr, align 4
+  %ret = load i8, ptr %ptr, align 4
+  ret i8 %ret
+}
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 7, !"Dwarf Version", i32 5}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !DILocalVariable(name: "x", scope: !3, type: !6)
+!3 = distinct !DISubprogram(name: "test", unit: !4)
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, emissionKind: FullDebug)
+!5 = !DIFile(filename: "dbg-bit-piece.cpp", directory: "")
+!6 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!7 = !DILocation(line: 0, column: 0, scope: !3)
+!8 = !DILocalVariable(name: "z", scope: !3, type: !6)
+!9 = !DILocalVariable(name: "y", scope: !3, type: !6)
+!10 = !DILocalVariable(name: "x", scope: !3, type: !11)
+!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+
+;.
+; CHECK: [[META2]] = !DILocalVariable(name: "x", scope: [[META3:![0-9]+]], type: [[META6:![0-9]+]])
+; CHECK: [[META3]] = distinct !DISubprogram(name: "test", scope: null, spFlags: DISPFlagDefinition, unit: [[META4:![0-9]+]])
+; CHECK: [[META4]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META5:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+; CHECK: [[META5]] = !DIFile(filename: "dbg-bit-piece.cpp", directory: "")
+; CHECK: [[META6]] = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+; CHECK: [[DBG7]] = !DILocation(line: 0, scope: [[META3]])
+; CHECK: [[META8]] = !DILocalVariable(name: "z", scope: [[META3]], type: [[META6]])
+; CHECK: [[META9]] = !DILocalVariable(name: "y", scope: [[META3]], type: [[META6]])
+; CHECK: [[META10]] = !DILocalVariable(name: "x", scope: [[META3]], type: [[META11:![0-9]+]])
+; CHECK: [[META11]] = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+;.



More information about the llvm-commits mailing list