[llvm] ee72b17 - Fix UB in DwarfExpression::emitLegacyZExt()

Wed Jan 26 13:08:47 PST 2022

Author: Adrian Prantl
Date: 2022-01-26T13:08:35-08:00
New Revision: ee72b173869fd78c49bdd5b85a94fc35b8332ba6

URL: https://github.com/llvm/llvm-project/commit/ee72b173869fd78c49bdd5b85a94fc35b8332ba6
DIFF: https://github.com/llvm/llvm-project/commit/ee72b173869fd78c49bdd5b85a94fc35b8332ba6.diff

LOG: Fix UB in DwarfExpression::emitLegacyZExt()

A shift-left > 63 triggers a UBSAN failure. This patch kicks the can
down the road (to the consumer) by emitting a more compact
representation of the shift computation in DWARF expressions.

Relanding (I accidentally pushed an earlier version of the patch previously).

Differential Revision: https://reviews.llvm.org/D118183

Added: 
    

Modified: 
    llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
    llvm/test/DebugInfo/X86/convert-debugloc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 37407c98e75f8..ee932d1051079 100644

--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,9 +681,25 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
 }
 
 void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
-  // (X & (1 << FromBits - 1))
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned((1ULL << FromBits) - 1);
+  // Heuristic to decide the most efficient encoding.
+  // A ULEB can encode 7 1-bits per byte.
+  if (FromBits / 7 < 1+1+1+1+1) {
+    // (X & (1 << FromBits - 1))
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned((1ULL << FromBits) - 1);
+  } else {
+    // Note that the DWARF 4 stack consists of pointer-sized elements,
+    // so technically it doesn't make sense to shift left more than 64
+    // bits. We leave that for the consumer to decide though. LLDB for
+    // example uses APInt for the stack elements and can still deal
+    // with this.
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned(FromBits);
+    emitOp(dwarf::DW_OP_shl);
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_minus);
+  }
   emitOp(dwarf::DW_OP_and);
 }
 

diff  --git a/llvm/test/DebugInfo/X86/convert-debugloc.ll b/llvm/test/DebugInfo/X86/convert-debugloc.ll
index 21e41dcc4c2a9..de0857d538327 100644
--- a/llvm/test/DebugInfo/X86/convert-debugloc.ll
+++ b/llvm/test/DebugInfo/X86/convert-debugloc.ll
@@ -27,7 +27,7 @@
 ; RUN:   | FileCheck %s --check-prefix=VERBOSE --check-prefix=CONV "--implicit-check-not={{DW_TAG|NULL}}"
 
 
-; SPLITCONV: Compile Unit:{{.*}} DWO_id = 0x62f17241069b1fa3
+; SPLITCONV: Compile Unit:{{.*}} DWO_id = 0x24191746f389535f
 ; SPLIT: DW_TAG_skeleton_unit
 
 ; CONV: DW_TAG_compile_unit
@@ -41,6 +41,8 @@
 ; CONV-NEXT:DW_AT_encoding {{.*}}DW_ATE_signed)
 ; CONV-NEXT:DW_AT_byte_size {{.*}}0x04)
 ; CONV-NOT: DW_AT
+; CONV:   DW_TAG_base_type
+; CONV:   DW_TAG_base_type
 ; CONV:   DW_TAG_subprogram
 ; CONV:     DW_TAG_formal_parameter
 ; CONV:     DW_TAG_variable
@@ -50,11 +52,14 @@
 ; VERBOSE-SAME: [[SIG32]] ->
 ; CONV-SAME: [[SIG32]]) "DW_ATE_signed_32", DW_OP_stack_value)
 ; CONV:       DW_AT_name {{.*}}"y")
+; CONV:     DW_TAG_variable
 ; CONV:     NULL
 ; CONV:   DW_TAG_base_type
 ; CONV:     DW_AT_name {{.*}}"signed char")
 ; CONV:   DW_TAG_base_type
 ; CONV:     DW_AT_name {{.*}}"int")
+; CONV:   DW_TAG_base_type
+; CONV:     DW_AT_name {{.*}}"unsigned long long")
 ; CONV:   NULL
 
 ; NOCONV: DW_TAG_compile_unit
@@ -64,11 +69,17 @@
 ; NOCONV:       DW_AT_location (
 ; NOCONV:         {{.*}}, DW_OP_dup, DW_OP_constu 0x7, DW_OP_shr, DW_OP_lit0, DW_OP_not, DW_OP_mul, DW_OP_constu 0x8, DW_OP_shl, DW_OP_or, DW_OP_stack_value)
 ; NOCONV:       DW_AT_name ("y")
+; NOCONV:     DW_TAG_variable
+; NOCONV:       DW_AT_location (
+; NOCONV:         DW_OP_constu 0x40, DW_OP_lit0, DW_OP_plus, DW_OP_lit1, DW_OP_constu 0x40, DW_OP_shl, DW_OP_lit1, DW_OP_minus, DW_OP_and, DW_OP_stack_value)
+; NOCONV:       DW_AT_name ("z")
 ; NOCONV:     NULL
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("signed char")
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("int")
+; NOCONV:   DW_TAG_base_type
+; NOCONV:     DW_AT_name ("unsigned long long")
 ; NOCONV:   NULL
 
 
@@ -81,6 +92,7 @@ entry:
 ;; will not attempt to eliminate.  At the moment, only "convert" ops are folded.
 ;; If you have to change the expression, the expected DWO_id also changes.
   call void @llvm.dbg.value(metadata i8 32, metadata !13, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !15
+  call void @llvm.dbg.value(metadata i8 64, metadata !17, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 128, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !15
   ret i8 %x, !dbg !16
 }
 
@@ -111,3 +123,5 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !15 = !DILocation(line: 3, column: 14, scope: !7)
 !16 = !DILocation(line: 4, column: 3, scope: !7)
+!17 = !DILocalVariable(name: "z", scope: !7, file: !1, line: 3, type: !18)
+!18 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)