[llvm] [AArch64][InstCombine] Eliminate redundant barrier intrinsics (PR #112023)

Danila Malyutin via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 12:04:56 PDT 2024


https://github.com/danilaml updated https://github.com/llvm/llvm-project/pull/112023

>From bfabda722548acca30e31917d4e513c84f66a152 Mon Sep 17 00:00:00 2001
From: Danila Malyutin <dmalyutin at azul.com>
Date: Fri, 11 Oct 2024 20:47:42 +0400
Subject: [PATCH] [AArch64][InstCombine] Eliminate redundant barrier intrinsics

If there are no memory ops on the path from one dmb to another then one
barrier can be eliminated.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  31 +++
 .../InstCombine/AArch64/dmb-intrinsics.ll     | 220 ++++++++++++++++++
 2 files changed, 251 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 91ab3fcfc4c70e..4af7a2e14ed61f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -66,6 +66,10 @@ static cl::opt<unsigned>
     BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
                     cl::desc("The cost of a histcnt instruction"));
 
+static cl::opt<unsigned> DMBLookaheadThreshold(
+    "dmb-lookahead-threshold", cl::init(10), cl::Hidden,
+    cl::desc("The number of instructions to search for a redundant dmb"));
+
 namespace {
 class TailFoldingOption {
   // These bitfields will only ever be set to something non-zero in operator=,
@@ -2150,6 +2154,31 @@ static std::optional<Instruction *> instCombineSVEInsr(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
+  // If this barrier is post-dominated by identical one we can remove it
+  auto *NI = II.getNextNonDebugInstruction();
+  unsigned LookaheadThreshold = DMBLookaheadThreshold;
+  auto CanSkipOver = [](Instruction *I) {
+    return !I->mayReadOrWriteMemory() && !I->mayHaveSideEffects();
+  };
+  while (LookaheadThreshold-- && CanSkipOver(NI)) {
+    auto *NIBB = NI->getParent();
+    NI = NI->getNextNonDebugInstruction();
+    if (!NI) {
+      if (auto *SuccBB = NIBB->getUniqueSuccessor())
+        NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();
+      else
+        break;
+    }
+  }
+  auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
+  if (NextII && II.isIdenticalTo(NextII))
+    return IC.eraseInstFromFunction(II);
+
+  return std::nullopt;
+}
+
 std::optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                      IntrinsicInst &II) const {
@@ -2157,6 +2186,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   switch (IID) {
   default:
     break;
+  case Intrinsic::aarch64_dmb:
+    return instCombineDMB(IC, II);
   case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
   case Intrinsic::aarch64_sve_fcvt_f16f32:
   case Intrinsic::aarch64_sve_fcvt_f16f64:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
new file mode 100644
index 00000000000000..dacdd413013658
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; ARM64 dmb intrinsics
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @llvm.aarch64.dmb(i32)
+declare void @llvm.aarch64.dsb(i32)
+declare void @clobber()
+declare void @pure() memory(none) willreturn nounwind
+declare i32  @llvm.ctlz.i32(i32, i1)
+
+define void @simple() #0 {
+; CHECK-LABEL: define void @simple() {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+; dmb ish (0xb) is technically stronger than ishst (0xa) but we don't merge for now
+define void @simple_nonmatching() #0 {
+; CHECK-LABEL: define void @simple_nonmatching() {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 11)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @llvm.aarch64.dmb(i32 11)
+  ret void
+}
+
+define ptr @simple_safe_instruction(ptr %p) #0 {
+; CHECK-LABEL: define ptr @simple_safe_instruction(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret ptr [[RES]]
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  %res = getelementptr inbounds i8, ptr %p, i32 8
+  call void @llvm.aarch64.dmb(i32 10)
+  ret ptr %res
+}
+
+define i32 @simple_safe_intrinsic(i32 %n) #0 {
+; CHECK-LABEL: define i32 @simple_safe_intrinsic(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:    [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[N]], i1 false)
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  %res = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  call void @llvm.aarch64.dmb(i32 10)
+  ret i32 %res
+}
+
+define void @simple_unsafe_intrinsic() #0 {
+; CHECK-LABEL: define void @simple_unsafe_intrinsic() {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    call void @llvm.aarch64.dsb(i32 10)
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @llvm.aarch64.dsb(i32 10)
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @simple_safe_unsafe_instruction(ptr %p) #0 {
+; CHECK-LABEL: define void @simple_safe_unsafe_instruction(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  store i32 42, ptr %p
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @simple_safe_unsafe_call(ptr %p) #0 {
+; CHECK-LABEL: define void @simple_safe_unsafe_call(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @clobber()
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @simple_safe_safe_call(ptr %p) #0 {
+; CHECK-LABEL: define void @simple_safe_safe_call(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @pure()
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs1(i1 %f) #0 {
+; CHECK-LABEL: define void @multiple_bbs1(
+; CHECK-SAME: i1 [[F:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+bb_f:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs2(i1 %f) #0 {
+; CHECK-LABEL: define void @multiple_bbs2(
+; CHECK-SAME: i1 [[F:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+bb_f:
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs3(i1 %f, ptr %p) #0 {
+; CHECK-LABEL: define void @multiple_bbs3(
+; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+bb_f:
+  store i32 42, ptr %p
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs_unsafe(i1 %f, ptr %p) #0 {
+; CHECK-LABEL: define void @multiple_bbs_unsafe(
+; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  store i32 42, ptr %p
+  br label %exit
+bb_f:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+



More information about the llvm-commits mailing list