[llvm] [AggressiveInstCombine] Ignore debug instructions when load combining (PR #70200)

Wed Oct 25 05:06:45 PDT 2023

https://github.com/mikaelholmen created https://github.com/llvm/llvm-project/pull/70200

We previously included debug instructions when counting instructions when
looking for loads to combine. This meant that the presence of debug
instructions could affect optimization, as shown in the updated testcase.

This fixes #69925.


>From 74e1a5008eb5aae9454410015eb6ed10f3d17c63 Mon Sep 17 00:00:00 2001
From: Mikael Holmen <mikael.holmen at ericsson.com>
Date: Wed, 25 Oct 2023 07:09:54 +0200
Subject: [PATCH 1/2] [test][AggressiveInstCombine] Precommit testcase for
 #69925

We get different results with/without debug info present.
---
 .../AArch64/combine_ignore_debug.ll           | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll

diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll
new file mode 100644
index 000000000000000..4b41060544f7a0d
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix DBG
+; RUN: opt -strip-debug -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix NODBG
+
+; FIXME: The DBG and NODBG cases should be the same. I.e. we should optimize the
+; DBG case too even if there is a dbg.value.
+; This is described in https://github.com/llvm/llvm-project/issues/69925
+
+target datalayout = "E"
+
+%s = type { i16, i16 }
+
+ at e = global %s zeroinitializer, align 1
+ at l = global %s zeroinitializer, align 1
+
+define void @test() {
+; DBG-LABEL: define void @test() {
+; DBG-NEXT:  entry:
+; DBG-NEXT:    [[L1:%.*]] = load i16, ptr @e, align 1
+; DBG-NEXT:    call void @llvm.dbg.value(metadata i32 undef, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]]
+; DBG-NEXT:    [[L2:%.*]] = load i16, ptr getelementptr inbounds ([[S:%.*]], ptr @e, i16 0, i32 1), align 1
+; DBG-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i32
+; DBG-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i32
+; DBG-NEXT:    [[S1:%.*]] = shl nuw i32 [[E1]], 16
+; DBG-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[E2]]
+; DBG-NEXT:    store i32 [[O1]], ptr @l, align 1
+; DBG-NEXT:    ret void
+;
+; NODBG-LABEL: define void @test() {
+; NODBG-NEXT:  entry:
+; NODBG-NEXT:    [[L1:%.*]] = load i32, ptr @e, align 1
+; NODBG-NEXT:    store i32 [[L1]], ptr @l, align 1
+; NODBG-NEXT:    ret void
+;
+entry:
+  %l1 = load i16, ptr @e, align 1
+  call void @llvm.dbg.value(metadata i32 undef, metadata !3, metadata !DIExpression()), !dbg !5
+  %l2 = load i16, ptr getelementptr inbounds (%s, ptr @e, i16 0, i32 1), align 1
+  %e2 = zext i16 %l2 to i32
+  %e1 = zext i16 %l1 to i32
+  %s1 = shl nuw i32 %e1, 16
+  %o1 = or i32 %s1, %e2
+  store i32 %o1, ptr @l, align 1
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1)
+!1 = !DIFile(filename: "foo.c", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocalVariable(scope: !4)
+!4 = distinct !DISubprogram(unit: !0)
+!5 = !DILocation(scope: !4)

>From 201d0c67ac0e52cb64b46db04f04b17596446bc5 Mon Sep 17 00:00:00 2001
From: Mikael Holmen <mikael.holmen at ericsson.com>
Date: Wed, 25 Oct 2023 07:11:15 +0200
Subject: [PATCH 2/2] [AggressiveInstCombine] Ignore debug instructions when
 load combining

We previously included debug instructions when counting instructions when
looking for loads to combine. This meant that the presence of debug
instructions could affect optimization, as shown in the updated testcase.

This fixes #69925.
---
 .../AggressiveInstCombine.cpp                      | 11 ++++++++++-
 .../AArch64/combine_ignore_debug.ll                | 14 ++++----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index a55d01645f10eb8..72f55e237ca9151 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -701,12 +701,21 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
       Loc = Loc.getWithNewSize(LOps.LoadSize);
   } else
     Loc = MemoryLocation::get(End);
+
+  // Ignore debug info (and other "AssumeLike" intrinsics) so that's not counted
+  // against MaxInstrsToScan. Otherwise debug info could affect codegen.
+  auto IsAssumeLikeIntr = [](const Instruction &I) {
+    if (auto *II = dyn_cast<IntrinsicInst>(&I))
+      return II->isAssumeLikeIntrinsic();
+    return false;
+  };
   unsigned NumScanned = 0;
   for (Instruction &Inst :
        make_range(Start->getIterator(), End->getIterator())) {
     if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
       return false;
-    if (++NumScanned > MaxInstrsToScan)
+
+    if (!IsAssumeLikeIntr(Inst) && ++NumScanned > MaxInstrsToScan)
       return false;
   }
 
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll
index 4b41060544f7a0d..68455a1f9074ecb 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll
@@ -2,9 +2,8 @@
 ; RUN: opt -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix DBG
 ; RUN: opt -strip-debug -mtriple aarch64 -aggressive-instcombine-max-scan-instrs=1 -passes="aggressive-instcombine" -S < %s | FileCheck %s -check-prefix NODBG
 
-; FIXME: The DBG and NODBG cases should be the same. I.e. we should optimize the
-; DBG case too even if there is a dbg.value.
-; This is described in https://github.com/llvm/llvm-project/issues/69925
+; The DBG and NODBG cases should be the same. I.e. we should optimize the DBG
+; case too even if there is a dbg.value.
 
 target datalayout = "E"
 
@@ -16,14 +15,9 @@ target datalayout = "E"
 define void @test() {
 ; DBG-LABEL: define void @test() {
 ; DBG-NEXT:  entry:
-; DBG-NEXT:    [[L1:%.*]] = load i16, ptr @e, align 1
+; DBG-NEXT:    [[L1:%.*]] = load i32, ptr @e, align 1
 ; DBG-NEXT:    call void @llvm.dbg.value(metadata i32 undef, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]]
-; DBG-NEXT:    [[L2:%.*]] = load i16, ptr getelementptr inbounds ([[S:%.*]], ptr @e, i16 0, i32 1), align 1
-; DBG-NEXT:    [[E2:%.*]] = zext i16 [[L2]] to i32
-; DBG-NEXT:    [[E1:%.*]] = zext i16 [[L1]] to i32
-; DBG-NEXT:    [[S1:%.*]] = shl nuw i32 [[E1]], 16
-; DBG-NEXT:    [[O1:%.*]] = or i32 [[S1]], [[E2]]
-; DBG-NEXT:    store i32 [[O1]], ptr @l, align 1
+; DBG-NEXT:    store i32 [[L1]], ptr @l, align 1
 ; DBG-NEXT:    ret void
 ;
 ; NODBG-LABEL: define void @test() {