[llvm] 1a60905 - [AArch64][InstCombine] Eliminate redundant barrier intrinsics (#112023)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 17 10:04:07 PDT 2024


Author: Danila Malyutin
Date: 2024-10-17T21:04:04+04:00
New Revision: 1a609052b65e7b8ca78159d5ad14eafbeb039eb2

URL: https://github.com/llvm/llvm-project/commit/1a609052b65e7b8ca78159d5ad14eafbeb039eb2
DIFF: https://github.com/llvm/llvm-project/commit/1a609052b65e7b8ca78159d5ad14eafbeb039eb2.diff

LOG: [AArch64][InstCombine] Eliminate redundant barrier intrinsics (#112023)

If there are no memory ops on the path from one dmb to another then one
barrier can be eliminated.

Added: 
    llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d33d0aa5855495..7c6b789b9c1b72 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -66,6 +66,10 @@ static cl::opt<unsigned>
     BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
                     cl::desc("The cost of a histcnt instruction"));
 
+static cl::opt<unsigned> DMBLookaheadThreshold(
+    "dmb-lookahead-threshold", cl::init(10), cl::Hidden,
+    cl::desc("The number of instructions to search for a redundant dmb"));
+
 namespace {
 class TailFoldingOption {
   // These bitfields will only ever be set to something non-zero in operator=,
@@ -2152,6 +2156,31 @@ static std::optional<Instruction *> instCombineSVEInsr(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
+  // If this barrier is post-dominated by identical one we can remove it
+  auto *NI = II.getNextNonDebugInstruction();
+  unsigned LookaheadThreshold = DMBLookaheadThreshold;
+  auto CanSkipOver = [](Instruction *I) {
+    return !I->mayReadOrWriteMemory() && !I->mayHaveSideEffects();
+  };
+  while (LookaheadThreshold-- && CanSkipOver(NI)) {
+    auto *NIBB = NI->getParent();
+    NI = NI->getNextNonDebugInstruction();
+    if (!NI) {
+      if (auto *SuccBB = NIBB->getUniqueSuccessor())
+        NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();
+      else
+        break;
+    }
+  }
+  auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
+  if (NextII && II.isIdenticalTo(NextII))
+    return IC.eraseInstFromFunction(II);
+
+  return std::nullopt;
+}
+
 std::optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                      IntrinsicInst &II) const {
@@ -2159,6 +2188,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   switch (IID) {
   default:
     break;
+  case Intrinsic::aarch64_dmb:
+    return instCombineDMB(IC, II);
   case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
   case Intrinsic::aarch64_sve_fcvt_f16f32:
   case Intrinsic::aarch64_sve_fcvt_f16f64:

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
new file mode 100644
index 00000000000000..dacdd413013658
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; ARM64 dmb intrinsics
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @llvm.aarch64.dmb(i32)
+declare void @llvm.aarch64.dsb(i32)
+declare void @clobber()
+declare void @pure() memory(none) willreturn nounwind
+declare i32  @llvm.ctlz.i32(i32, i1)
+
+define void @simple() #0 {
+; CHECK-LABEL: define void @simple() {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+; dmb ish (0xb) is technically stronger than ishst (0xa) but we don't merge for now
+define void @simple_nonmatching() #0 {
+; CHECK-LABEL: define void @simple_nonmatching() {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 11)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @llvm.aarch64.dmb(i32 11)
+  ret void
+}
+
+define ptr @simple_safe_instruction(ptr %p) #0 {
+; CHECK-LABEL: define ptr @simple_safe_instruction(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret ptr [[RES]]
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  %res = getelementptr inbounds i8, ptr %p, i32 8
+  call void @llvm.aarch64.dmb(i32 10)
+  ret ptr %res
+}
+
+define i32 @simple_safe_intrinsic(i32 %n) #0 {
+; CHECK-LABEL: define i32 @simple_safe_intrinsic(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:    [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[N]], i1 false)
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  %res = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  call void @llvm.aarch64.dmb(i32 10)
+  ret i32 %res
+}
+
+define void @simple_unsafe_intrinsic() #0 {
+; CHECK-LABEL: define void @simple_unsafe_intrinsic() {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    call void @llvm.aarch64.dsb(i32 10)
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @llvm.aarch64.dsb(i32 10)
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @simple_safe_unsafe_instruction(ptr %p) #0 {
+; CHECK-LABEL: define void @simple_safe_unsafe_instruction(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  store i32 42, ptr %p
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @simple_safe_unsafe_call(ptr %p) #0 {
+; CHECK-LABEL: define void @simple_safe_unsafe_call(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @clobber()
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @simple_safe_safe_call(ptr %p) #0 {
+; CHECK-LABEL: define void @simple_safe_safe_call(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.aarch64.dmb(i32 10)
+  call void @pure()
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs1(i1 %f) #0 {
+; CHECK-LABEL: define void @multiple_bbs1(
+; CHECK-SAME: i1 [[F:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+bb_f:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs2(i1 %f) #0 {
+; CHECK-LABEL: define void @multiple_bbs2(
+; CHECK-SAME: i1 [[F:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+bb_f:
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs3(i1 %f, ptr %p) #0 {
+; CHECK-LABEL: define void @multiple_bbs3(
+; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+bb_f:
+  store i32 42, ptr %p
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+
+define void @multiple_bbs_unsafe(i1 %f, ptr %p) #0 {
+; CHECK-LABEL: define void @multiple_bbs_unsafe(
+; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]]
+; CHECK:       [[BB_T]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    store i32 42, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BB_F]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %f, label %bb_t, label %bb_f
+bb_t:
+  call void @llvm.aarch64.dmb(i32 10)
+  store i32 42, ptr %p
+  br label %exit
+bb_f:
+  call void @llvm.aarch64.dmb(i32 10)
+  br label %exit
+exit:
+  call void @llvm.aarch64.dmb(i32 10)
+  ret void
+}
+


        


More information about the llvm-commits mailing list