[llvm] [clang] [RFC] Introducing `__builtin_consistent` to generate AArch64 BC.cond … (PR #72175)

Pavel Iliin via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 13 16:00:45 PST 2023


https://github.com/ilinpv created https://github.com/llvm/llvm-project/pull/72175

…instructions

The patch adds new `__builtin_consistent` which when used with control flow conditions provides a hint to compiler that branch or switch is very unlikely to change direction.
The hint is used to generate efficient conditional branch instructions on targets which support them. Currently it is `BC.cond` on AArch64 when FEAT_HBC enabled.

>From 375575418f0ebbed871711fb8b47e984f59d339f Mon Sep 17 00:00:00 2001
From: Pavel Iliin <Pavel.Iliin at arm.com>
Date: Tue, 11 Jul 2023 10:07:02 +0100
Subject: [PATCH] [RFC] Introducing `__builtin_consistent` to generate AArch64
 BC.cond instructions

The patch adds new `__builtin_consistent` which when used with control
flow conditions provides a hint to compiler that branch or switch is very
unlikely to change direction.
The hint is used to generate efficient conditional branch instructions
on targets which support them. Currently it is `BC.cond` on AArch64 when
FEAT_HBC enabled.
---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |   5 +-
 clang/docs/LanguageExtensions.rst             |  29 ++
 clang/include/clang/Basic/Builtins.def        |   5 +-
 clang/lib/Analysis/CalledOnceCheck.cpp        |   1 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  10 +-
 clang/lib/CodeGen/CGStmt.cpp                  |  17 +-
 clang/lib/CodeGen/CodeGenFunction.cpp         |  18 +-
 .../Checkers/BuiltinFunctionChecker.cpp       |   3 +-
 clang/test/CodeGen/builtin-consistent.c       |  37 ++
 llvm/include/llvm/CodeGen/MachineInstr.h      |   1 +
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h |   8 +-
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |   7 +-
 llvm/include/llvm/IR/FixedMetadataKinds.def   |   1 +
 llvm/include/llvm/IR/IRBuilder.h              |  26 +-
 llvm/include/llvm/IR/MDBuilder.h              |   3 +
 llvm/lib/CodeGen/BranchFolding.cpp            |  45 +-
 llvm/lib/CodeGen/MIRPrinter.cpp               |   2 +
 llvm/lib/CodeGen/MachineBasicBlock.cpp        |  25 +-
 llvm/lib/CodeGen/MachineInstr.cpp             |   5 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  34 +-
 .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp |   3 +
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  57 ++-
 .../SelectionDAG/SelectionDAGBuilder.h        |  12 +-
 .../SelectionDAG/SelectionDAGDumper.cpp       |   3 +
 llvm/lib/IR/IRBuilder.cpp                     |   3 +-
 llvm/lib/IR/MDBuilder.cpp                     |   5 +
 .../Target/AArch64/AArch64CondBrTuning.cpp    |   6 +-
 .../AArch64/AArch64ConditionOptimizer.cpp     |   2 +-
 llvm/lib/Target/AArch64/AArch64FastISel.cpp   |  41 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  50 ++-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   1 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |   4 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  45 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.h    |  14 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   6 +-
 .../lib/Target/AArch64/AArch64MacroFusion.cpp |   3 +-
 .../AArch64RedundantCopyElimination.cpp       |   2 +-
 .../GISel/AArch64InstructionSelector.cpp      |  27 +-
 llvm/lib/Target/AMDGPU/R600InstrInfo.cpp      |   8 +-
 llvm/lib/Target/AMDGPU/R600InstrInfo.h        |   8 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |   8 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |   8 +-
 llvm/lib/Target/ARC/ARCInstrInfo.cpp          |   7 +-
 llvm/lib/Target/ARC/ARCInstrInfo.h            |   8 +-
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      |   7 +-
 llvm/lib/Target/ARM/ARMBaseInstrInfo.h        |   8 +-
 llvm/lib/Target/AVR/AVRInstrInfo.cpp          |   7 +-
 llvm/lib/Target/AVR/AVRInstrInfo.h            |   8 +-
 llvm/lib/Target/BPF/BPFInstrInfo.cpp          |   8 +-
 llvm/lib/Target/BPF/BPFInstrInfo.h            |   9 +-
 llvm/lib/Target/CSKY/CSKYInstrInfo.cpp        |  13 +-
 llvm/lib/Target/CSKY/CSKYInstrInfo.h          |   8 +-
 llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp  |   9 +-
 llvm/lib/Target/Hexagon/HexagonInstrInfo.h    |   8 +-
 llvm/lib/Target/Lanai/LanaiInstrInfo.cpp      |   8 +-
 llvm/lib/Target/Lanai/LanaiInstrInfo.h        |  10 +-
 .../Target/LoongArch/LoongArchInstrInfo.cpp   |  12 +-
 .../lib/Target/LoongArch/LoongArchInstrInfo.h |   8 +-
 llvm/lib/Target/M68k/M68kInstrInfo.cpp        |  13 +-
 llvm/lib/Target/M68k/M68kInstrInfo.h          |   8 +-
 llvm/lib/Target/MSP430/MSP430InstrInfo.cpp    |   7 +-
 llvm/lib/Target/MSP430/MSP430InstrInfo.h      |   8 +-
 llvm/lib/Target/Mips/MipsInstrInfo.cpp        |   8 +-
 llvm/lib/Target/Mips/MipsInstrInfo.h          |   8 +-
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp      |   8 +-
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.h        |   8 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |   8 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.h        |   8 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |  13 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |   8 +-
 llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp      |  13 +-
 llvm/lib/Target/SPIRV/SPIRVInstrInfo.h        |   8 +-
 llvm/lib/Target/Sparc/SparcInstrInfo.cpp      |   8 +-
 llvm/lib/Target/Sparc/SparcInstrInfo.h        |   8 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  |   7 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.h    |   8 +-
 llvm/lib/Target/VE/VEInstrInfo.cpp            |   7 +-
 llvm/lib/Target/VE/VEInstrInfo.h              |   8 +-
 .../WebAssembly/WebAssemblyInstrInfo.cpp      |  12 +-
 .../Target/WebAssembly/WebAssemblyInstrInfo.h |   8 +-
 llvm/lib/Target/X86/X86InstrInfo.cpp          |   8 +-
 llvm/lib/Target/X86/X86InstrInfo.h            |   8 +-
 llvm/lib/Target/XCore/XCoreInstrInfo.cpp      |   8 +-
 llvm/lib/Target/XCore/XCoreInstrInfo.h        |   8 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |  10 +-
 llvm/test/CodeGen/AArch64/cond-br-tuning.ll   | 117 +++++-
 llvm/test/CodeGen/AArch64/tbl-loops.ll        | 395 +++++++++++++++++-
 88 files changed, 1100 insertions(+), 384 deletions(-)
 create mode 100644 clang/test/CodeGen/builtin-consistent.c

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index b852b9fbc9c52f4..725e0e2adb821cf 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -962,7 +962,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     if (isTB(Inst) || isCB(Inst)) {
       Inst.setOpcode(getInvertedBranchOpcode(Inst.getOpcode()));
       assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
-    } else if (Inst.getOpcode() == AArch64::Bcc) {
+    } else if (Inst.getOpcode() == AArch64::Bcc ||
+               Inst.getOpcode() == AArch64::BCcc) {
       Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
           static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
       assert(Inst.getOperand(0).getImm() != AArch64CC::AL &&
@@ -991,6 +992,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::B:        return 28;
     case AArch64::BL:       return 28;
     case AArch64::Bcc:      return 21;
+    case AArch64::BCcc:
+      return 21;
     }
   }
 
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 294210c6ac140a9..8f68f144691cc55 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3102,6 +3102,35 @@ flow conditions such as in ``if`` and ``switch`` statements.
 
 Query for this feature with ``__has_builtin(__builtin_unpredictable)``.
 
+``__builtin_consistent``
+------------------------
+
+``__builtin_consistent`` is used to indicate that the value of an expression is
+very likely to be consistent, e.g. branch having expression as condition will
+behave very consistently and is very unlikely to change direction.
+
+**Syntax**:
+
+.. code-block:: c++
+
+    __builtin_consistent(long long)
+
+**Example of use**:
+
+.. code-block:: c++
+
+  if (__builtin_consistent(x > 0)) {
+    foo();
+  }
+
+**Description**:
+
+The ``__builtin_consistent()`` builtin used with control flow conditions will
+provide information about branch consistently behavior which can be used to
+generate more efficient conditional branch instructions if target supports them
+(like AArch64 FEAT_HBC ``BC.cond``).
+
+Query for this feature with ``__has_builtin(__builtin_consistent)``.
 
 ``__builtin_expect``
 --------------------
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ec39e926889b936..948964c90dc76ae 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -667,8 +667,9 @@ BUILTIN(__builtin___printf_chk, "iicC*R.", "Fp:1:")
 BUILTIN(__builtin___vfprintf_chk, "iP*RicC*Ra", "FP:2:")
 BUILTIN(__builtin___vprintf_chk, "iicC*Ra", "FP:1:")
 
-BUILTIN(__builtin_unpredictable, "LiLi"   , "nc")
-BUILTIN(__builtin_expect, "LiLiLi"   , "ncE")
+BUILTIN(__builtin_unpredictable, "LiLi", "nc")
+BUILTIN(__builtin_consistent, "LiLi", "nc")
+BUILTIN(__builtin_expect, "LiLiLi", "ncE")
 BUILTIN(__builtin_expect_with_probability, "LiLiLid", "ncE")
 BUILTIN(__builtin_prefetch, "vvC*.", "nc")
 BUILTIN(__builtin_readcyclecounter, "ULLi", "n")
diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp
index 5b4fc24b6f0e2a5..d1725f6151dbabc 100644
--- a/clang/lib/Analysis/CalledOnceCheck.cpp
+++ b/clang/lib/Analysis/CalledOnceCheck.cpp
@@ -365,6 +365,7 @@ class DeclRefFinder
     }
 
     case Builtin::BI__builtin_unpredictable:
+    case Builtin::BI__builtin_consistent:
       return Visit(CE->getArg(0));
 
     default:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..63ccd42f9e31329 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3137,10 +3137,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                      "cast");
     return RValue::get(Result);
   }
-  case Builtin::BI__builtin_unpredictable: {
-    // Always return the argument of __builtin_unpredictable. LLVM does not
-    // handle this builtin. Metadata for this builtin should be added directly
-    // to instructions such as branches or switches that use it.
+  case Builtin::BI__builtin_unpredictable:
+  case Builtin::BI__builtin_consistent: {
+    // Always return the argument of __builtin_unpredictable and
+    // __builtin_consistent. LLVM does not handle these builtins. Metadata for
+    // these builtins should be added directly to instructions such as branches
+    // or switches that use it.
     return RValue::get(EmitScalarExpr(E->getArg(0)));
   }
   case Builtin::BI__builtin_expect: {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index c719df1bfa05036..2aa4b8253c7aff1 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2059,16 +2059,21 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   EmitBlock(SwitchExit.getBlock(), true);
   incrementProfileCounter(&S);
 
-  // If the switch has a condition wrapped by __builtin_unpredictable,
-  // create metadata that specifies that the switch is unpredictable.
-  // Don't bother if not optimizing because that metadata would not be used.
+  // If the switch has a condition wrapped by __builtin_unpredictable or
+  // __builtin_consistent, create metadata that specifies that the switch is
+  // unpredictable or consistent correspondingly. Don't bother if not optimizing
+  // because that metadata would not be used.
   auto *Call = dyn_cast<CallExpr>(S.getCond());
   if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
     auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
-    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+    if (FD) {
       llvm::MDBuilder MDHelper(getLLVMContext());
-      SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
-                              MDHelper.createUnpredictable());
+      if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable)
+        SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
+                                MDHelper.createUnpredictable());
+      if (FD->getBuiltinID() == Builtin::BI__builtin_consistent)
+        SwitchInsn->setMetadata(llvm::LLVMContext::MD_consistent,
+                                MDHelper.createConsistent());
     }
   }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 0f2b9055b88eb04..a372913f8732c76 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1908,16 +1908,21 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
 
   llvm::MDNode *Weights = nullptr;
   llvm::MDNode *Unpredictable = nullptr;
+  llvm::MDNode *Consistent = nullptr;
 
-  // If the branch has a condition wrapped by __builtin_unpredictable,
-  // create metadata that specifies that the branch is unpredictable.
-  // Don't bother if not optimizing because that metadata would not be used.
+  // If the branch has a condition wrapped by __builtin_unpredictable or
+  // __builtin_consistent, create metadata that specifies that the branch is
+  // unpredictable or consistent correspondingly. Don't bother if not optimizing
+  // because that metadata would not be used.
   auto *Call = dyn_cast<CallExpr>(Cond->IgnoreImpCasts());
   if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
     auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
-    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+    if (FD) {
       llvm::MDBuilder MDHelper(getLLVMContext());
-      Unpredictable = MDHelper.createUnpredictable();
+      if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable)
+        Unpredictable = MDHelper.createUnpredictable();
+      if (FD->getBuiltinID() == Builtin::BI__builtin_consistent)
+        Consistent = MDHelper.createConsistent();
     }
   }
 
@@ -1932,7 +1937,8 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
     Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount);
   }
 
-  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable);
+  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable,
+                       Consistent);
 }
 
 /// ErrorUnsupported - Print out an error that codegen doesn't support the
diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 4a56156de4b27fe..3de5c6676a732fd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -63,12 +63,13 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
   }
 
   case Builtin::BI__builtin_unpredictable:
+  case Builtin::BI__builtin_consistent:
   case Builtin::BI__builtin_expect:
   case Builtin::BI__builtin_expect_with_probability:
   case Builtin::BI__builtin_assume_aligned:
   case Builtin::BI__builtin_addressof:
   case Builtin::BI__builtin_function_start: {
-    // For __builtin_unpredictable, __builtin_expect,
+    // For __builtin_unpredictable, __builtin_consistent, __builtin_expect,
     // __builtin_expect_with_probability and __builtin_assume_aligned,
     // just return the value of the subexpression.
     // __builtin_addressof is going from a reference to a pointer, but those
diff --git a/clang/test/CodeGen/builtin-consistent.c b/clang/test/CodeGen/builtin-consistent.c
new file mode 100644
index 000000000000000..3f1be494acf1412
--- /dev/null
+++ b/clang/test/CodeGen/builtin-consistent.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm -disable-llvm-passes -o - %s -O1 | FileCheck %s 
+// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm -o - %s -O0 | FileCheck %s --check-prefix=CHECK_O0
+
+void f(void);
+void g(void);
+void consistent_branch(int x) {
+// CHECK-LABEL: define{{.*}} void @consistent_branch(
+// CHECK-NOT: builtin_consistent
+// CHECK: !consistent [[METADATA:.+]]
+// CHECK_O0-NOT: builtin_consistent
+// CHECK_O0-NOT: !consistent 
+  if (__builtin_consistent(x > 0))
+    f();
+  
+  if (x || __builtin_consistent(x != 0))
+    g();
+}
+
+int consistent_switch(int x) {
+// CHECK-LABEL: @consistent_switch(
+// CHECK-NOT: builtin_consistent
+// CHECK: !consistent [[METADATA:.+]]
+// CHECK_O0-NOT: builtin_consistent
+// CHECK_O0-NOT: !consistent 
+  switch(__builtin_consistent(x)) {
+  default:
+    return x;
+  case 0:
+  case 1:
+  case 2:
+    return 1;
+  case 3:
+    return x-1;
+  };
+}
+// CHECK: [[METADATA]] = !{i1 true}
+
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index bd72ac23fc9c08e..1fa27026d80f81c 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -114,6 +114,7 @@ class MachineInstr
                              // this instruction.
     Unpredictable = 1 << 16, // Instruction with unpredictable condition.
     NoConvergent = 1 << 17,  // Call does not require convergence guarantees.
+    Consistent = 1 << 18,    // Instruction condition behaves consistently.
   };
 
 private:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 4df56aac4aa17ba..58680867953f847 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -398,6 +398,8 @@ struct SDNodeFlags {
   bool NoFPExcept : 1;
   // Instructions with attached 'unpredictable' metadata on IR level.
   bool Unpredictable : 1;
+  // Instructions with attached 'consistent' metadata on IR level.
+  bool Consistent : 1;
 
 public:
   /// Default constructor turns off all optimization flags.
@@ -405,7 +407,8 @@ struct SDNodeFlags {
       : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NonNeg(false),
         NoNaNs(false), NoInfs(false), NoSignedZeros(false),
         AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false),
-        AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {}
+        AllowReassociation(false), NoFPExcept(false), Unpredictable(false),
+        Consistent(false) {}
 
   /// Propagate the fast-math-flags from an IR FPMathOperator.
   void copyFMF(const FPMathOperator &FPMO) {
@@ -432,6 +435,7 @@ struct SDNodeFlags {
   void setAllowReassociation(bool b) { AllowReassociation = b; }
   void setNoFPExcept(bool b) { NoFPExcept = b; }
   void setUnpredictable(bool b) { Unpredictable = b; }
+  void setConsistent(bool b) { Consistent = b; }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -447,6 +451,7 @@ struct SDNodeFlags {
   bool hasAllowReassociation() const { return AllowReassociation; }
   bool hasNoFPExcept() const { return NoFPExcept; }
   bool hasUnpredictable() const { return Unpredictable; }
+  bool hasConsistent() const { return Consistent; }
 
   /// Clear any flags in this flag set that aren't also set in Flags. All
   /// flags will be cleared if Flags are undefined.
@@ -464,6 +469,7 @@ struct SDNodeFlags {
     AllowReassociation &= Flags.AllowReassociation;
     NoFPExcept &= Flags.NoFPExcept;
     Unpredictable &= Flags.Unpredictable;
+    Consistent &= Flags.Consistent;
   }
 };
 
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8e7499ac626a747..e893baa90c14dbb 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -698,7 +698,8 @@ class TargetInstrInfo : public MCInstrInfo {
   /// If \p BytesRemoved is non-null, report the change in code size from the
   /// removed instructions.
   virtual unsigned removeBranch(MachineBasicBlock &MBB,
-                                int *BytesRemoved = nullptr) const {
+                                int *BytesRemoved = nullptr,
+                                bool *IsConsistent = nullptr) const {
     llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!");
   }
 
@@ -718,8 +719,8 @@ class TargetInstrInfo : public MCInstrInfo {
   virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 ArrayRef<MachineOperand> Cond,
-                                const DebugLoc &DL,
-                                int *BytesAdded = nullptr) const {
+                                const DebugLoc &DL, int *BytesAdded = nullptr,
+                                bool IsConsistent = false) const {
     llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!");
   }
 
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index b375d0f0912060f..6dd0627c028bd84 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -51,3 +51,4 @@ LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36)
 LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37)
 LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
 LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
+LLVM_FIXED_MD_KIND(MD_consistent, "consistent", 40)
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index e3c4e76f90a4cfc..0cd6a7340845733 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1064,15 +1064,18 @@ class IRBuilderBase {
   //===--------------------------------------------------------------------===//
 
 private:
-  /// Helper to add branch weight and unpredictable metadata onto an
-  /// instruction.
+  /// Helper to add branch weight, unpredictable and consistent metadata onto
+  /// an instruction.
   /// \returns The annotated instruction.
   template <typename InstTy>
-  InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpredictable) {
+  InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpred,
+                            MDNode *Consist) {
     if (Weights)
       I->setMetadata(LLVMContext::MD_prof, Weights);
-    if (Unpredictable)
-      I->setMetadata(LLVMContext::MD_unpredictable, Unpredictable);
+    if (Unpred)
+      I->setMetadata(LLVMContext::MD_unpredictable, Unpred);
+    if (Consist)
+      I->setMetadata(LLVMContext::MD_consistent, Consist);
     return I;
   }
 
@@ -1110,9 +1113,10 @@ class IRBuilderBase {
   /// instruction.
   BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
                            MDNode *BranchWeights = nullptr,
-                           MDNode *Unpredictable = nullptr) {
+                           MDNode *Unpredictable = nullptr,
+                           MDNode *Consistent = nullptr) {
     return Insert(addBranchMetadata(BranchInst::Create(True, False, Cond),
-                                    BranchWeights, Unpredictable));
+                                    BranchWeights, Unpredictable, Consistent));
   }
 
   /// Create a conditional 'br Cond, TrueDest, FalseDest'
@@ -1121,7 +1125,8 @@ class IRBuilderBase {
                            Instruction *MDSrc) {
     BranchInst *Br = BranchInst::Create(True, False, Cond);
     if (MDSrc) {
-      unsigned WL[4] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+      unsigned WL[5] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+                        LLVMContext::MD_consistent,
                         LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
       Br->copyMetadata(*MDSrc, WL);
     }
@@ -1133,9 +1138,10 @@ class IRBuilderBase {
   /// allocation).
   SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10,
                            MDNode *BranchWeights = nullptr,
-                           MDNode *Unpredictable = nullptr) {
+                           MDNode *Unpredictable = nullptr,
+                           MDNode *Consistent = nullptr) {
     return Insert(addBranchMetadata(SwitchInst::Create(V, Dest, NumCases),
-                                    BranchWeights, Unpredictable));
+                                    BranchWeights, Unpredictable, Consistent));
   }
 
   /// Create an indirect branch instruction with the specified address
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index 39165453de16b09..f3ec99568cfa4c6 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -67,6 +67,9 @@ class MDBuilder {
   /// Return metadata specifying that a branch or switch is unpredictable.
   MDNode *createUnpredictable();
 
+  /// Return metadata specifying that a branch or switch behaves consistently.
+  MDNode *createConsistent();
+
   /// Return metadata containing the entry \p Count for a function, a boolean
   /// \Synthetic indicating whether the counts were synthetized, and the
   /// GUIDs stored in \p Imports that need to be imported for sample PGO, to
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 0801296cab49f8f..e8fca44a816d94a 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -465,8 +465,10 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
     MachineBasicBlock *NextBB = &*I;
     if (TBB == NextBB && !Cond.empty() && !FBB) {
       if (!TII->reverseBranchCondition(Cond)) {
-        TII->removeBranch(*CurMBB);
-        TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
+        bool IsConsistent = false;
+        TII->removeBranch(*CurMBB, nullptr, &IsConsistent);
+        TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl, nullptr,
+                          IsConsistent);
         return;
       }
     }
@@ -1116,12 +1118,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
         // Remove the unconditional branch at the end, if any.
         if (TBB && (Cond.empty() || FBB)) {
+          bool IsConsistent = false;
           DebugLoc dl = PBB->findBranchDebugLoc();
-          TII->removeBranch(*PBB);
+          TII->removeBranch(*PBB, nullptr, &IsConsistent);
           if (!Cond.empty())
             // reinsert conditional branch only, for now
-            TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
-                              NewCond, dl);
+            TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, NewCond,
+                              dl, nullptr, IsConsistent);
         }
 
         MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB));
@@ -1443,9 +1446,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     // If the prior block branches somewhere else on the condition and here if
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
+      bool IsConsistent = false;
       DebugLoc dl = getBranchDebugLoc(PrevBB);
-      TII->removeBranch(PrevBB);
-      TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+      TII->removeBranch(PrevBB, nullptr, &IsConsistent);
+      TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl, nullptr,
+                        IsConsistent);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1457,9 +1462,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     if (PriorTBB == MBB) {
       SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
       if (!TII->reverseBranchCondition(NewPriorCond)) {
+        bool IsConsistent = false;
         DebugLoc dl = getBranchDebugLoc(PrevBB);
-        TII->removeBranch(PrevBB);
-        TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
+        TII->removeBranch(PrevBB, nullptr, &IsConsistent);
+        TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl, nullptr,
+                          IsConsistent);
         MadeChange = true;
         ++NumBranchOpts;
         goto ReoptimizeBlock;
@@ -1495,9 +1502,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
           LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB
                             << "To make fallthrough to: " << *PriorTBB << "\n");
 
+          bool IsConsistent = false;
           DebugLoc dl = getBranchDebugLoc(PrevBB);
-          TII->removeBranch(PrevBB);
-          TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
+          TII->removeBranch(PrevBB, nullptr, &IsConsistent);
+          TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl, nullptr,
+                            IsConsistent);
 
           // Move this block to the end of the function.
           MBB->moveAfter(&MF.back());
@@ -1558,9 +1567,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
       if (!TII->reverseBranchCondition(NewCond)) {
+        bool IsConsistent = false;
         DebugLoc dl = getBranchDebugLoc(*MBB);
-        TII->removeBranch(*MBB);
-        TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+        TII->removeBranch(*MBB, nullptr, &IsConsistent);
+        TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl, nullptr,
+                          IsConsistent);
         MadeChange = true;
         ++NumBranchOpts;
         goto ReoptimizeBlock;
@@ -1606,9 +1617,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
               assert(!PriorFBB && "Machine CFG out of date!");
               PriorFBB = MBB;
             }
+            bool IsConsistent = false;
             DebugLoc pdl = getBranchDebugLoc(PrevBB);
-            TII->removeBranch(PrevBB);
-            TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+            TII->removeBranch(PrevBB, nullptr, &IsConsistent);
+            TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl,
+                              nullptr, IsConsistent);
           }
 
           // Iterate through all the predecessors, revectoring each in-turn.
@@ -1654,7 +1667,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
       }
 
       // Add the branch back if the block is more than just an uncond branch.
-      TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
+      TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl, nullptr, 0);
     }
   }
 
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index fee237104022e16..75db4141e486ffc 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -805,6 +805,8 @@ void MIPrinter::print(const MachineInstr &MI) {
     OS << "unpredictable ";
   if (MI.getFlag(MachineInstr::NoConvergent))
     OS << "noconvergent ";
+  if (MI.getFlag(MachineInstr::Consistent))
+    OS << "consistent ";
 
   OS << TII->getName(MI.getOpcode());
   if (I < E)
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index d9e22685faf5f5e..03cc56313d54d10 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -719,23 +719,25 @@ void MachineBasicBlock::updateTerminator(
       // If the unconditional successor block is not the current layout
       // successor, insert a branch to jump to it.
       if (!isLayoutSuccessor(PreviousLayoutSuccessor))
-        TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
+        TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL,
+                          nullptr, 0);
     }
     return;
   }
 
   if (FBB) {
+    bool IsConsistent = false;
     // The block has a non-fallthrough conditional branch. If one of its
     // successors is its layout successor, rewrite it to a fallthrough
     // conditional branch.
     if (isLayoutSuccessor(TBB)) {
       if (TII->reverseBranchCondition(Cond))
         return;
-      TII->removeBranch(*this);
-      TII->insertBranch(*this, FBB, nullptr, Cond, DL);
+      TII->removeBranch(*this, nullptr, &IsConsistent);
+      TII->insertBranch(*this, FBB, nullptr, Cond, DL, nullptr, IsConsistent);
     } else if (isLayoutSuccessor(FBB)) {
-      TII->removeBranch(*this);
-      TII->insertBranch(*this, TBB, nullptr, Cond, DL);
+      TII->removeBranch(*this, nullptr, &IsConsistent);
+      TII->insertBranch(*this, TBB, nullptr, Cond, DL, nullptr, IsConsistent);
     }
     return;
   }
@@ -757,6 +759,7 @@ void MachineBasicBlock::updateTerminator(
     return;
   }
 
+  bool IsConsistent = false;
   // The block has a fallthrough conditional branch.
   if (isLayoutSuccessor(TBB)) {
     if (TII->reverseBranchCondition(Cond)) {
@@ -765,11 +768,13 @@ void MachineBasicBlock::updateTerminator(
       TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
       return;
     }
-    TII->removeBranch(*this);
-    TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
+    TII->removeBranch(*this, nullptr, &IsConsistent);
+    TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL,
+                      nullptr, IsConsistent);
   } else if (!isLayoutSuccessor(PreviousLayoutSuccessor)) {
-    TII->removeBranch(*this);
-    TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL);
+    TII->removeBranch(*this, nullptr, &IsConsistent);
+    TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL, nullptr,
+                      IsConsistent);
   }
 }
 
@@ -1218,7 +1223,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
     SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes);
     SmallVector<MachineOperand, 4> Cond;
     const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
-    TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
+    TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL, nullptr, 0);
   }
 
   // Fix PHI nodes in Succ so they refer to NMBB instead of this.
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 9e7b4df2576feee..520a31ff8e1be90 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -586,6 +586,9 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
   if (I.getMetadata(LLVMContext::MD_unpredictable))
     MIFlags |= MachineInstr::MIFlag::Unpredictable;
 
+  if (I.getMetadata(LLVMContext::MD_consistent))
+    MIFlags |= MachineInstr::MIFlag::Consistent;
+
   return MIFlags;
 }
 
@@ -1693,6 +1696,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << "nofpexcept ";
   if (getFlag(MachineInstr::NoMerge))
     OS << "nomerge ";
+  if (getFlag(MachineInstr::Consistent))
+    OS << "consistent ";
 
   // Print the opcode name.
   if (TII)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a867d88f76c0cf6..3dec1f636a6f1a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17702,6 +17702,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+  unsigned BrOpcode = N->getOpcode();
+  SDNodeFlags Flags;
+  Flags.setConsistent(N->getFlags().hasConsistent());
   SDValue Chain = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
@@ -17709,8 +17712,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
   // nondeterministic jumps).
   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
-    return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
-                       N1->getOperand(0), N2);
+    return DAG.getNode(BrOpcode, SDLoc(N), MVT::Other, Chain, N1->getOperand(0),
+                       N2, Flags);
   }
 
   // Variant of the previous fold where there is a SETCC in between:
@@ -17758,8 +17761,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
 
     if (Updated)
       return DAG.getNode(
-          ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
-          DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
+          BrOpcode, SDLoc(N), MVT::Other, Chain,
+          DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
+          Flags);
   }
 
   // If N is a constant we could fold this into a fallthrough or unconditional
@@ -17773,9 +17777,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   if (N1.getOpcode() == ISD::SETCC &&
       TLI.isOperationLegalOrCustom(ISD::BR_CC,
                                    N1.getOperand(0).getValueType())) {
-    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
-                       Chain, N1.getOperand(2),
-                       N1.getOperand(0), N1.getOperand(1), N2);
+    SDValue Ops[] = {Chain, N1.getOperand(2), N1.getOperand(0),
+                     N1.getOperand(1), N2};
+    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Ops, Flags);
   }
 
   if (N1.hasOneUse()) {
@@ -17783,8 +17787,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
     HandleSDNode ChainHandle(Chain);
     if (SDValue NewN1 = rebuildSetCC(N1))
-      return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
-                         ChainHandle.getValue(), NewN1, N2);
+      return DAG.getNode(BrOpcode, SDLoc(N), MVT::Other, ChainHandle.getValue(),
+                         NewN1, N2, Flags);
   }
 
   return SDValue();
@@ -17906,11 +17910,13 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   if (Simp.getNode()) AddToWorklist(Simp.getNode());
 
   // fold to a simpler setcc
-  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
-    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
-                       N->getOperand(0), Simp.getOperand(2),
-                       Simp.getOperand(0), Simp.getOperand(1),
-                       N->getOperand(4));
+  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) {
+    SDNodeFlags Flags;
+    Flags.setConsistent(N->getFlags().hasConsistent());
+    SDValue Ops[] = {N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0),
+                     Simp.getOperand(1), N->getOperand(4)};
+    return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, Ops, Flags);
+  }
 
   return SDValue();
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a27febe15db832d..406fae0f22aafb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1083,6 +1083,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
       MI->setFlag(MachineInstr::MIFlag::Unpredictable);
   }
 
+  if (Node->getFlags().hasConsistent())
+    MIB.getInstr()->setFlag(MachineInstr::MIFlag::Consistent);
+
   // Emit all of the actual operands of this instruction, adding them to the
   // instruction as appropriate.
   bool HasOptPRefs = NumDefs > NumResults;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ad5a4506efbd828..433204f4bc1881b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1061,7 +1061,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
                          : Opc == ISD::SETCCCARRY                      ? 3
                          : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2
                                                                        : 1;
-    unsigned CompareOperand = Opc == ISD::BR_CC            ? 2
+    unsigned CompareOperand = (Opc == ISD::BR_CC)          ? 2
                               : Opc == ISD::STRICT_FSETCC  ? 1
                               : Opc == ISD::STRICT_FSETCCS ? 1
                                                            : 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index aab0d5c5a348bfe..ad8ae961a57fd94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2548,7 +2548,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
         }
 
         // Emit the branch for this block.
-        visitSwitchCase(SL->SwitchCases[0], BrMBB);
+        visitSwitchCase(SL->SwitchCases[0], BrMBB,
+                        I.hasMetadata(LLVMContext::MD_consistent));
         SL->SwitchCases.erase(SL->SwitchCases.begin());
         return;
       }
@@ -2568,13 +2569,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
 
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
-  visitSwitchCase(CB, BrMBB);
+  visitSwitchCase(CB, BrMBB, I.hasMetadata(LLVMContext::MD_consistent));
 }
 
 /// visitSwitchCase - Emits the necessary code to represent a single node in
 /// the binary search tree resulting from lowering a switch instruction.
 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
-                                          MachineBasicBlock *SwitchBB) {
+                                          MachineBasicBlock *SwitchBB,
+                                          bool IsConsistent) {
   SDValue Cond;
   SDValue CondLHS = getValue(CB.CmpLHS);
   SDLoc dl = CB.DL;
@@ -2652,9 +2654,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
   }
 
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
-                               MVT::Other, getControlRoot(), Cond,
-                               DAG.getBasicBlock(CB.TrueBB));
+  SDNodeFlags Flags;
+  Flags.setConsistent(IsConsistent);
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(),
+                               Cond, DAG.getBasicBlock(CB.TrueBB), Flags);
 
   setValue(CurInst, BrCond);
 
@@ -2887,7 +2890,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
 /// visitBitTestHeader - This function emits necessary code to produce value
 /// suitable for "bit tests"
 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
-                                             MachineBasicBlock *SwitchBB) {
+                                             MachineBasicBlock *SwitchBB,
+                                             bool IsConsistent) {
   SDLoc dl = getCurSDLoc();
 
   // Subtract the minimum value.
@@ -2935,9 +2939,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                                RangeSub.getValueType()),
         RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
         ISD::SETUGT);
-
+    SDNodeFlags Flags;
+    Flags.setConsistent(IsConsistent);
     Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
-                       DAG.getBasicBlock(B.Default));
+                       DAG.getBasicBlock(B.Default), Flags);
   }
 
   // Avoid emitting unnecessary branches to the next block.
@@ -3404,6 +3409,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
   Flags.setUnpredictable(
       cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
 
+  Flags.setConsistent(
+      cast<SelectInst>(I).getMetadata(LLVMContext::MD_consistent));
+
   // Min/max matching is only viable if all output VTs are the same.
   if (all_equal(ValueVTs)) {
     EVT VT = ValueVTs[0];
@@ -11357,9 +11365,12 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
     HasTailCall = true;
 }
 
-void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W,
+                                        const SwitchInst &SI,
                                         MachineBasicBlock *SwitchMBB,
                                         MachineBasicBlock *DefaultMBB) {
+  const Value *Cond = SI.getCondition();
+  bool IsConsistent = SI.getMetadata(LLVMContext::MD_consistent);
   MachineFunction *CurMF = FuncInfo.MF;
   MachineBasicBlock *NextMBB = nullptr;
   MachineFunction::iterator BBI(W.MBB);
@@ -11412,9 +11423,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
           addSuccessorWithProb(SwitchMBB, DefaultMBB);
 
         // Insert the true branch.
+        SDNodeFlags Flags;
+        Flags.setConsistent(IsConsistent);
         SDValue BrCond =
             DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
-                        DAG.getBasicBlock(Small.MBB));
+                        DAG.getBasicBlock(Small.MBB), Flags);
         // Insert the false branch.
         BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
                              DAG.getBasicBlock(DefaultMBB));
@@ -11571,7 +11584,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
 
         // If we're in the right place, emit the bit test header right now.
         if (CurMBB == SwitchMBB) {
-          visitBitTestHeader(*BTB, SwitchMBB);
+          visitBitTestHeader(*BTB, SwitchMBB, IsConsistent);
           BTB->Emitted = true;
         }
         break;
@@ -11602,7 +11615,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
                      getCurSDLoc(), I->Prob, UnhandledProbs);
 
         if (CurMBB == SwitchMBB)
-          visitSwitchCase(CB, SwitchMBB);
+          visitSwitchCase(CB, SwitchMBB, IsConsistent);
         else
           SL->SwitchCases.push_back(CB);
 
@@ -11627,7 +11640,7 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
 
 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
                                         const SwitchWorkListItem &W,
-                                        Value *Cond,
+                                        const SwitchInst &SI,
                                         MachineBasicBlock *SwitchMBB) {
   assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
          "Clusters not sorted?");
@@ -11729,7 +11742,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
     WorkList.push_back(
         {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
     // Put Cond in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(Cond);
+    ExportFromCurrentBlock(SI.getCondition());
   }
 
   // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
@@ -11745,15 +11758,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
     WorkList.push_back(
         {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
     // Put Cond in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(Cond);
+    ExportFromCurrentBlock(SI.getCondition());
   }
 
   // Create the CaseBlock record that will be used to lower the branch.
-  CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
-               getCurSDLoc(), LeftProb, RightProb);
+  CaseBlock CB(ISD::SETLT, SI.getCondition(), Pivot, nullptr, LeftMBB, RightMBB,
+               W.MBB, getCurSDLoc(), LeftProb, RightProb);
 
   if (W.MBB == SwitchMBB)
-    visitSwitchCase(CB, SwitchMBB);
+    visitSwitchCase(CB, SwitchMBB, SI.getMetadata(LLVMContext::MD_consistent));
   else
     SL->SwitchCases.push_back(CB);
 }
@@ -11815,7 +11828,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
   auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
   SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
                           nullptr,   nullptr,      TopCaseProb.getCompl()};
-  lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+  lowerWorkItem(W, SI, SwitchMBB, PeeledSwitchMBB);
 
   Clusters.erase(PeeledCaseIt);
   for (CaseCluster &CC : Clusters) {
@@ -11908,11 +11921,11 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None &&
         !DefaultMBB->getParent()->getFunction().hasMinSize()) {
       // For optimized builds, lower large range as a balanced binary tree.
-      splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
+      splitWorkItem(WorkList, W, SI, SwitchMBB);
       continue;
     }
 
-    lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
+    lowerWorkItem(W, SI, SwitchMBB, DefaultMBB);
   }
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index a97884f0efb9a9b..a426c1c871fc9b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -222,11 +222,11 @@ class SelectionDAGBuilder {
 
   /// Emit comparison and split W into two subtrees.
   void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
-                     const SwitchCG::SwitchWorkListItem &W, Value *Cond,
-                     MachineBasicBlock *SwitchMBB);
+                     const SwitchCG::SwitchWorkListItem &W,
+                     const SwitchInst &SI, MachineBasicBlock *SwitchMBB);
 
   /// Lower W.
-  void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
+  void lowerWorkItem(SwitchCG::SwitchWorkListItem W, const SwitchInst &SI,
                      MachineBasicBlock *SwitchMBB,
                      MachineBasicBlock *DefaultMBB);
 
@@ -525,12 +525,14 @@ class SelectionDAGBuilder {
       BranchProbability Prob = BranchProbability::getUnknown());
 
 public:
-  void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB);
+  void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB,
+                       bool IsConsistent = false);
   void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
                                MachineBasicBlock *ParentBB);
   void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
   void visitBitTestHeader(SwitchCG::BitTestBlock &B,
-                          MachineBasicBlock *SwitchBB);
+                          MachineBasicBlock *SwitchBB,
+                          bool IsConsistent = false);
   void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
                         BranchProbability BranchProbToNext, unsigned Reg,
                         SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 78cc60084068a5f..ad4e4850bdc7507 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -624,6 +624,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getFlags().hasNoFPExcept())
     OS << " nofpexcept";
 
+  if (getFlags().hasConsistent())
+    OS << " consistent";
+
   if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
     if (!MN->memoperands_empty()) {
       OS << "<";
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b321d8b325fe0be..9ad1d24f79a69bd 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -1125,7 +1125,8 @@ Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False,
   if (MDFrom) {
     MDNode *Prof = MDFrom->getMetadata(LLVMContext::MD_prof);
     MDNode *Unpred = MDFrom->getMetadata(LLVMContext::MD_unpredictable);
-    Sel = addBranchMetadata(Sel, Prof, Unpred);
+    MDNode *Consist = MDFrom->getMetadata(LLVMContext::MD_consistent);
+    Sel = addBranchMetadata(Sel, Prof, Unpred, Consist);
   }
   if (isa<FPMathOperator>(Sel))
     setFPAttrs(Sel, nullptr /* MDNode* */, FMF);
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 2490b3012bdc2b4..ec0e2ec93d30e04 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -56,6 +56,11 @@ MDNode *MDBuilder::createUnpredictable() {
   return MDNode::get(Context, std::nullopt);
 }
 
+MDNode *MDBuilder::createConsistent() {
+  return MDNode::get(
+      Context, createConstant(ConstantInt::get(Type::getInt1Ty(Context), 1)));
+}
+
 MDNode *MDBuilder::createFunctionEntryCount(
     uint64_t Count, bool Synthetic,
     const DenseSet<GlobalValue::GUID> *Imports) {
diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index da72e35a248eb29..10f7f6e9923dd80 100644
--- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -132,7 +132,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
     CC = AArch64CC::MI;
     break;
   }
-  return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
+  return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+                 MI.getFlag(MachineInstr::MIFlag::Consistent) &&
+                         TII->getSubtarget().hasHBC()
+                     ? TII->get(AArch64::BCcc)
+                     : TII->get(AArch64::Bcc))
       .addImm(CC)
       .addMBB(TargetMBB);
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 1c20e24e41d7eac..c412c8d76aed0c3 100644
--- a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -149,7 +149,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
   if (Term == MBB->end())
     return nullptr;
 
-  if (Term->getOpcode() != AArch64::Bcc)
+  if (Term->getOpcode() != AArch64::Bcc && Term->getOpcode() != AArch64::BCcc)
     return nullptr;
 
   // Since we may modify cmp of this MBB, make sure NZCV does not live out.
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9b8162ce8dd4d0f..76033c746c280f9 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2449,16 +2449,30 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
 
       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
       if (ExtraCC != AArch64CC::AL) {
+        if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC())
+          // For branches with consistent metadata emit conditional branches
+          // with a hint that it will behave very consistently if target
+          // supports HBC
+          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+                  TII.get(AArch64::BCcc))
+              .addImm(ExtraCC)
+              .addMBB(TBB);
+        else
+          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
+              .addImm(ExtraCC)
+              .addMBB(TBB);
+      }
+      // Emit the branch.
+      if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) {
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BCcc))
+            .addImm(CC)
+            .addMBB(TBB);
+      } else {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
-            .addImm(ExtraCC)
+            .addImm(CC)
             .addMBB(TBB);
       }
 
-      // Emit the branch.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
-          .addImm(CC)
-          .addMBB(TBB);
-
       finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
     }
@@ -2485,10 +2499,17 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
       if (!CondReg)
         return false;
 
-      // Emit the branch.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
-        .addImm(CC)
-        .addMBB(TBB);
+      if (BI->getMetadata(LLVMContext::MD_consistent) && Subtarget->hasHBC()) {
+        // Emit conditional branch with a consistent behaviour hint
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BCcc))
+            .addImm(CC)
+            .addMBB(TBB);
+      } else {
+        // Emit the branch.
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
+            .addImm(CC)
+            .addMBB(TBB);
+      }
 
       finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3bff2845b7a1342..945328d0e7e382d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2346,6 +2346,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::LOADgot)
     MAKE_CASE(AArch64ISD::RET_GLUE)
     MAKE_CASE(AArch64ISD::BRCOND)
+    MAKE_CASE(AArch64ISD::BRCCOND)
     MAKE_CASE(AArch64ISD::CSEL)
     MAKE_CASE(AArch64ISD::CSINV)
     MAKE_CASE(AArch64ISD::CSNEG)
@@ -8604,6 +8605,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue RHS = Op.getOperand(3);
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
+  SDNodeFlags Flags;
+  bool IsConsistent = Op.getNode()->getFlags().hasConsistent();
+  Flags.setConsistent(IsConsistent);
+  unsigned BRCondOpc = IsConsistent && Subtarget->hasHBC() ? AArch64ISD::BRCCOND
+                                                           : AArch64ISD::BRCOND;
 
   MachineFunction &MF = DAG.getMachineFunction();
   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
@@ -8643,8 +8649,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       OFCC = getInvertedCondCode(OFCC);
     SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
 
-    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
-                       Overflow);
+    SDValue Ops[] = {Chain, Dest, CCVal, Overflow};
+    return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags);
   }
 
   if (LHS.getValueType().isInteger()) {
@@ -8665,12 +8671,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
             isPowerOf2_64(LHS.getConstantOperandVal(1))) {
           SDValue Test = LHS.getOperand(0);
           uint64_t Mask = LHS.getConstantOperandVal(1);
-          return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
-                             Dest);
+          SDValue Ops[] = {Chain, Test,
+                           DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest};
+          return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Ops, Flags);
         }
 
-        return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
+        return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest,
+                           Flags);
       } else if (CC == ISD::SETNE) {
         // See if we can use a TBZ to fold in an AND as well.
         // TBZ has a smaller branch displacement than CBZ.  If the offset is
@@ -8681,20 +8688,22 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
             isPowerOf2_64(LHS.getConstantOperandVal(1))) {
           SDValue Test = LHS.getOperand(0);
           uint64_t Mask = LHS.getConstantOperandVal(1);
-          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
-                             Dest);
+          SDValue Ops[] = {Chain, Test,
+                           DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest};
+          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Ops, Flags);
         }
 
-        return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
+        return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest,
+                           Flags);
       } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
         // Don't combine AND since emitComparison converts the AND to an ANDS
         // (a.k.a. TST) and the test in the test bit and branch instruction
         // becomes redundant.  This would also increase register pressure.
         uint64_t SignBitPos;
         std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
-        return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
-                           DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
+        SDValue Ops[] = {Chain, LHS, DAG.getConstant(SignBitPos, dl, MVT::i64),
+                         Dest};
+        return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Ops, Flags);
       }
     }
     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -8704,14 +8713,15 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       // becomes redundant.  This would also increase register pressure.
       uint64_t SignBitPos;
       std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
-      return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
-                         DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
+      SDValue Ops[] = {Chain, LHS, DAG.getConstant(SignBitPos, dl, MVT::i64),
+                       Dest};
+      return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Ops, Flags);
     }
 
     SDValue CCVal;
     SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
-                       Cmp);
+    SDValue Ops[] = {Chain, Dest, CCVal, Cmp};
+    return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags);
   }
 
   assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
@@ -8723,12 +8733,12 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
   SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
-  SDValue BR1 =
-      DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
+  SDValue BR1Ops[] = {Chain, Dest, CC1Val, Cmp};
+  SDValue BR1 = DAG.getNode(BRCondOpc, dl, MVT::Other, BR1Ops, Flags);
   if (CC2 != AArch64CC::AL) {
     SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
-    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
-                       Cmp);
+    SDValue Ops[] = {BR1, Dest, CC2Val, Cmp};
+    return DAG.getNode(BRCondOpc, dl, MVT::Other, Ops, Flags);
   }
 
   return BR1;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 7332a95615a4da5..a6968587a2f9612 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -72,6 +72,7 @@ enum NodeType : unsigned {
             // Offset Table, TLS record).
   RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
   BRCOND,   // Conditional branch instruction; "b.cond".
+  BRCCOND,  // Hinted Conditional Branch "BC.cond".
   CSEL,
   CSINV, // Conditional select invert.
   CSNEG, // Conditional select negate.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f88f5a240a1fd7f..a3015a31b83e7e9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2089,10 +2089,10 @@ def am_brcond : Operand<OtherVT> {
   let OperandType = "OPERAND_PCREL";
 }
 
-class BranchCond<bit bit4, string mnemonic>
+class BranchCond<bit bit4, string mnemonic, SDNode node>
    : I<(outs), (ins ccode:$cond, am_brcond:$target),
        mnemonic, ".$cond\t$target", "",
-       [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> {
+       [(node bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> {
   let isBranch = 1;
   let isTerminator = 1;
   let Uses = [NZCV];
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6fdf5363bae2928..7a24b11fc2b8d10 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -180,6 +180,7 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
   default:
     llvm_unreachable("Unknown branch instruction?");
   case AArch64::Bcc:
+  case AArch64::BCcc:
     Target = LastInst->getOperand(1).getMBB();
     Cond.push_back(LastInst->getOperand(0));
     break;
@@ -221,6 +222,7 @@ static unsigned getBranchDisplacementBits(unsigned Opc) {
   case AArch64::CBZX:
     return CBZDisplacementBits;
   case AArch64::Bcc:
+  case AArch64::BCcc:
     return BCCDisplacementBits;
   }
 }
@@ -250,6 +252,7 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
   case AArch64::CBZX:
   case AArch64::CBNZX:
   case AArch64::Bcc:
+  case AArch64::BCcc:
     return MI.getOperand(1).getMBB();
   }
 }
@@ -535,7 +538,10 @@ bool AArch64InstrInfo::reverseBranchCondition(
 }
 
 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                        int *BytesRemoved) const {
+                                        int *BytesRemoved,
+                                        bool *IsConsistent) const {
+  if (IsConsistent)
+    *IsConsistent = false;
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
   if (I == MBB.end())
     return 0;
@@ -544,6 +550,10 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
       !isCondBranchOpcode(I->getOpcode()))
     return 0;
 
+  if (I->getOpcode() == AArch64::BCcc)
+    if (IsConsistent)
+      *IsConsistent = true;
+
   // Remove the branch.
   I->eraseFromParent();
 
@@ -561,6 +571,10 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
     return 1;
   }
 
+  if (I->getOpcode() == AArch64::BCcc)
+    if (IsConsistent)
+      *IsConsistent = true;
+
   // Remove the branch.
   I->eraseFromParent();
   if (BytesRemoved)
@@ -569,12 +583,16 @@ unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-void AArch64InstrInfo::instantiateCondBranch(
-    MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
-    ArrayRef<MachineOperand> Cond) const {
+void AArch64InstrInfo::instantiateCondBranch(MachineBasicBlock &MBB,
+                                             const DebugLoc &DL,
+                                             MachineBasicBlock *TBB,
+                                             ArrayRef<MachineOperand> Cond,
+                                             bool IsConsistent) const {
   if (Cond[0].getImm() != -1) {
     // Regular Bcc
-    BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
+    BuildMI(&MBB, DL, get(IsConsistent ? AArch64::BCcc : AArch64::Bcc))
+        .addImm(Cond[0].getImm())
+        .addMBB(TBB);
   } else {
     // Folded compare-and-branch
     // Note that we use addOperand instead of addReg to keep the flags.
@@ -586,9 +604,12 @@ void AArch64InstrInfo::instantiateCondBranch(
   }
 }
 
-unsigned AArch64InstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                        MachineBasicBlock *TBB,
+                                        MachineBasicBlock *FBB,
+                                        ArrayRef<MachineOperand> Cond,
+                                        const DebugLoc &DL, int *BytesAdded,
+                                        bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
 
@@ -596,7 +617,7 @@ unsigned AArch64InstrInfo::insertBranch(
     if (Cond.empty()) // Unconditional branch?
       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     else
-      instantiateCondBranch(MBB, DL, TBB, Cond);
+      instantiateCondBranch(MBB, DL, TBB, Cond, IsConsistent);
 
     if (BytesAdded)
       *BytesAdded = 4;
@@ -605,7 +626,7 @@ unsigned AArch64InstrInfo::insertBranch(
   }
 
   // Two-way conditional branch.
-  instantiateCondBranch(MBB, DL, TBB, Cond);
+  instantiateCondBranch(MBB, DL, TBB, Cond, IsConsistent);
   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
 
   if (BytesAdded)
@@ -1618,7 +1639,8 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
   default:
     return -1;
 
-  case AArch64::Bcc: {
+  case AArch64::Bcc:
+  case AArch64::BCcc: {
     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
     assert(Idx >= 2);
     return Idx - 2;
@@ -7873,6 +7895,7 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
   default:
     llvm_unreachable("Unknown branch instruction?");
   case AArch64::Bcc:
+  case AArch64::BCcc:
     return false;
   case AArch64::CBZW:
   case AArch64::CBZX:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index a934103c90cbf92..0a513aa5254d34d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -45,6 +45,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   /// always be able to get register info as well (through this method).
   const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
 
+  const AArch64Subtarget &getSubtarget() const { return Subtarget; }
+
   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   bool isAsCheapAsAMove(const MachineInstr &MI) const override;
@@ -238,12 +240,12 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   bool analyzeBranchPredicate(MachineBasicBlock &MBB,
                               MachineBranchPredicate &MBP,
                               bool AllowModify) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
@@ -403,7 +405,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
 
   void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
                              MachineBasicBlock *TBB,
-                             ArrayRef<MachineOperand> Cond) const;
+                             ArrayRef<MachineOperand> Cond,
+                             bool consistent) const;
   bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
                            const MachineRegisterInfo &MRI) const;
   bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg,
@@ -510,6 +513,7 @@ static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
 static inline bool isCondBranchOpcode(int Opc) {
   switch (Opc) {
   case AArch64::Bcc:
+  case AArch64::BCcc:
   case AArch64::CBZW:
   case AArch64::CBZX:
   case AArch64::CBNZW:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 290c79f7bacdb8f..198ae55aea21861 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -637,6 +637,8 @@ def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
 
 def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
                                 [SDNPHasChain]>;
+def AArch64brccond       : SDNode<"AArch64ISD::BRCCOND", SDT_AArch64Brcond,
+                                [SDNPHasChain]>;
 def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
                                 [SDNPHasChain]>;
 def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
@@ -2782,12 +2784,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
 //===----------------------------------------------------------------------===//
 // Conditional branch (immediate) instruction.
 //===----------------------------------------------------------------------===//
-def Bcc : BranchCond<0, "b">;
+def Bcc : BranchCond<0, "b", AArch64brcond>;
 
 // Armv8.8-A variant form which hints to the branch predictor that
 // this branch is very likely to go the same way nearly all the time
 // (even though it is not known at compile time _which_ way that is).
-def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
+def BCcc : BranchCond<1, "bc", AArch64brccond>, Requires<[HasHBC]>;
 
 //===----------------------------------------------------------------------===//
 // Compare-and-branch instructions.
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 05d60872bf51aca..a0b96f74a0289fe 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -21,7 +21,8 @@ using namespace llvm;
 /// CMN, CMP, TST followed by Bcc
 static bool isArithmeticBccPair(const MachineInstr *FirstMI,
                                 const MachineInstr &SecondMI, bool CmpOnly) {
-  if (SecondMI.getOpcode() != AArch64::Bcc)
+  if (SecondMI.getOpcode() != AArch64::Bcc &&
+      SecondMI.getOpcode() != AArch64::BCcc)
     return false;
 
   // Assume the 1st instr to be a wildcard if it is unspecified.
diff --git a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 1494312886a40de..0624a0b6b026460 100644
--- a/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -137,7 +137,7 @@ bool AArch64RedundantCopyElimination::knownRegValInBlock(
   }
 
   // Otherwise, must be a conditional branch.
-  if (Opc != AArch64::Bcc)
+  if (Opc != AArch64::Bcc && Opc != AArch64::BCcc)
     return false;
 
   // Must be an equality check (i.e., == or !=).
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index bdaae4dd724d536..796f75d040c4787 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1675,9 +1675,16 @@ bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
   AArch64CC::CondCode CC1, CC2;
   changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
-  if (CC2 != AArch64CC::AL)
-    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+  if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC())
+    MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC1).addMBB(DestMBB);
+  else
+    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
+  if (CC2 != AArch64CC::AL) {
+    if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC())
+      MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+    else
+      MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+  }
   I.eraseFromParent();
   return true;
 }
@@ -1790,7 +1797,10 @@ bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
   emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
       static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
-  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+  if (I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC())
+    MIB.buildInstr(AArch64::BCcc, {}, {}).addImm(CC).addMBB(DestMBB);
+  else
+    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
   I.eraseFromParent();
   return true;
 }
@@ -1821,9 +1831,12 @@ bool AArch64InstructionSelector::selectCompareBranch(
   auto TstMI =
       MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-  auto Bcc = MIB.buildInstr(AArch64::Bcc)
-                 .addImm(AArch64CC::NE)
-                 .addMBB(I.getOperand(1).getMBB());
+  auto Bcc =
+      MIB.buildInstr(I.getFlag(MachineInstr::MIFlag::Consistent) && STI.hasHBC()
+                         ? AArch64::BCcc
+                         : AArch64::Bcc)
+          .addImm(AArch64CC::NE)
+          .addMBB(I.getOperand(1).getMBB());
   I.eraseFromParent();
   return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
 }
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 7f874b245b8f4f4..cf9e601a7307252 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -730,8 +730,8 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *TBB,
                                      MachineBasicBlock *FBB,
                                      ArrayRef<MachineOperand> Cond,
-                                     const DebugLoc &DL,
-                                     int *BytesAdded) const {
+                                     const DebugLoc &DL, int *BytesAdded,
+                                     bool IsConsistent) const {
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert(!BytesAdded && "code size not handled");
 
@@ -773,8 +773,8 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
   }
 }
 
-unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                     int *BytesRemoved) const {
+unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                     bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   // Note : we leave PRED* instructions there.
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index f720e4656348c83..9d9acb576bfeaf0 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -170,11 +170,11 @@ class R600InstrInfo final : public R600GenInstrInfo {
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   bool isPredicated(const MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 027b695c3bb1a74..a000b567c88c08c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3036,8 +3036,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
 }
 
-unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                   int *BytesRemoved) const {
+unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                   bool *IsConsistent) const {
   unsigned Count = 0;
   unsigned RemovedSize = 0;
   for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) {
@@ -3066,8 +3066,8 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *TBB,
                                    MachineBasicBlock *FBB,
                                    ArrayRef<MachineOperand> Cond,
-                                   const DebugLoc &DL,
-                                   int *BytesAdded) const {
+                                   const DebugLoc &DL, int *BytesAdded,
+                                   bool IsConsistent) const {
   if (!FBB && Cond.empty()) {
     BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
       .addMBB(TBB);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 29f549fc29a3ce6..aefd6f4fadfc6b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -338,13 +338,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   bool reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index fe78a98837cf970..bceb525927f3c85 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -251,8 +251,8 @@ bool ARCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                    int *BytesRemoved) const {
+unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                    bool *IsConsistent) const {
   assert(!BytesRemoved && "Code size not handled");
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
   if (I == MBB.end())
@@ -370,7 +370,8 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL, int *BytesAdded) const {
+                                    const DebugLoc &DL, int *BytesAdded,
+                                    bool IsConsistent) const {
   assert(!BytesAdded && "Code size not handled.");
 
   // Shouldn't be a fall through.
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h
index c55c9535ec296b8..031fbcd5d048715 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.h
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.h
@@ -57,11 +57,11 @@ class ARCInstrInfo : public ARCGenInstrInfo {
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &, MCRegister DestReg, MCRegister SrcReg,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 4c78379ccf5c467..d249b165dfc1f68 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -469,7 +469,8 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 }
 
 unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                        int *BytesRemoved) const {
+                                        int *BytesRemoved,
+                                        bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
@@ -499,8 +500,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL,
-                                        int *BytesAdded) const {
+                                        const DebugLoc &DL, int *BytesAdded,
+                                        bool IsConsistent) const {
   assert(!BytesAdded && "code size not handled");
   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
   int BOpc   = !AFI->isThumbFunction()
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 5efcc1a0d9fc073..9fc6b12b52ce89f 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -143,12 +143,12 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 2640ad9e3626739..3829706dd8e5031 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -398,7 +398,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL, int *BytesAdded) const {
+                                    const DebugLoc &DL, int *BytesAdded,
+                                    bool IsConsistent) const {
   if (BytesAdded)
     *BytesAdded = 0;
 
@@ -435,8 +436,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
   return Count;
 }
 
-unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                    int *BytesRemoved) const {
+unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                    bool *IsConsistent) const {
   if (BytesRemoved)
     *BytesRemoved = 0;
 
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h
index 290177f5eec6665..209b244b786aa32 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -99,10 +99,10 @@ class AVRInstrInfo : public AVRGenInstrInfo {
                      bool AllowModify = false) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 2209f1f1462b43f..bc852e61d033f94 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -221,8 +221,8 @@ unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL,
-                                    int *BytesAdded) const {
+                                    const DebugLoc &DL, int *BytesAdded,
+                                    bool IsConsistent) const {
   assert(!BytesAdded && "code size not handled");
 
   // Shouldn't be a fall through.
@@ -238,8 +238,8 @@ unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB,
   llvm_unreachable("Unexpected conditional branch");
 }
 
-unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                    int *BytesRemoved) const {
+unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                    bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.end();
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h
index 354aca1bd2f93b4..d9e40493d267d3e 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -52,12 +52,13 @@ class BPFInstrInfo : public BPFGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
+
 private:
   void expandMEMCPY(MachineBasicBlock::iterator) const;
 
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index e5581bcdc397580..3b2f7bc722bc19e 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -110,8 +110,8 @@ bool CSKYInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                     int *BytesRemoved) const {
+unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                     bool *IsConsistent) const {
   if (BytesRemoved)
     *BytesRemoved = 0;
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
@@ -151,9 +151,12 @@ CSKYInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
   return MI.getOperand(NumOp - 1).getMBB();
 }
 
-unsigned CSKYInstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned CSKYInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *TBB,
+                                     MachineBasicBlock *FBB,
+                                     ArrayRef<MachineOperand> Cond,
+                                     const DebugLoc &DL, int *BytesAdded,
+                                     bool IsConsistent) const {
   if (BytesAdded)
     *BytesAdded = 0;
 
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
index dbb69a7a8798080..5a44544b77841cb 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -59,16 +59,16 @@ class CSKYInstrInfo : public CSKYGenInstrInfo {
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 6f0210763bc5f35..a4bc553e82d8555 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -603,7 +603,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 }
 
 unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                        int *BytesRemoved) const {
+                                        int *BytesRemoved,
+                                        bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   LLVM_DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB));
@@ -629,9 +630,9 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL,
-                                        int *BytesAdded) const {
-  unsigned BOpc   = Hexagon::J2_jump;
+                                        const DebugLoc &DL, int *BytesAdded,
+                                        bool IsConsistent) const {
+  unsigned BOpc = Hexagon::J2_jump;
   unsigned BccOpc = Hexagon::J2_jumpt;
   assert(validateBranchCond(Cond) && "Invalid branching condition");
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 0bc0877f6e70670..b08cc389d2fcaf7 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -111,8 +111,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   /// Remove the branching code at the end of the specific MBB.
   /// This is only invoked in cases where analyzeBranch returns success. It
   /// returns the number of instructions that were removed.
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   /// Insert branch code into the end of the specified MachineBasicBlock.
   /// The operands to this method are the same as those
@@ -126,8 +126,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   /// merging needs to be disabled.
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   /// Analyze loop L, which must be a single-basic-block loop, and if the
   /// conditions can be understood enough produce a PipelinerLoopInfo object.
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index aa7e8846406dd85..02e1fe3d5d57641 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -657,8 +657,8 @@ unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TrueBlock,
                                       MachineBasicBlock *FalseBlock,
                                       ArrayRef<MachineOperand> Condition,
-                                      const DebugLoc &DL,
-                                      int *BytesAdded) const {
+                                      const DebugLoc &DL, int *BytesAdded,
+                                      bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TrueBlock && "insertBranch must not be told to insert a fallthrough");
   assert(!BytesAdded && "code size not handled");
@@ -685,8 +685,8 @@ unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                      int *BytesRemoved) const {
+unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                      bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator Instruction = MBB.end();
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 62f6240c6e4681f..54a4e396fc70a3b 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -89,8 +89,8 @@ class LanaiInstrInfo : public LanaiGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Condition,
                      bool AllowModify) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   // For a comparison instruction, return the source registers in SrcReg and
   // SrcReg2 if having two register operands, and the value it compares against
@@ -138,9 +138,9 @@ class LanaiInstrInfo : public LanaiGenInstrInfo {
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock,
                         MachineBasicBlock *FalseBlock,
-                        ArrayRef<MachineOperand> Condition,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        ArrayRef<MachineOperand> Condition, const DebugLoc &DL,
+                        int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 };
 
 static inline bool isSPLSOpcode(unsigned Opcode) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index ddd1c9943fac016..fd193c9680fbaf3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -325,7 +325,8 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
 }
 
 unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                          int *BytesRemoved) const {
+                                          int *BytesRemoved,
+                                          bool *IsConsistent) const {
   if (BytesRemoved)
     *BytesRemoved = 0;
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
@@ -357,9 +358,12 @@ unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB,
 
 // Inserts a branch into the end of the specific MachineBasicBlock, returning
 // the number of instructions inserted.
-unsigned LoongArchInstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                          MachineBasicBlock *TBB,
+                                          MachineBasicBlock *FBB,
+                                          ArrayRef<MachineOperand> Cond,
+                                          const DebugLoc &DL, int *BytesAdded,
+                                          bool IsConsistent) const {
   if (BytesAdded)
     *BytesAdded = 0;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index 4b145d0baa4171e..2d198447cb4a6b7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -62,13 +62,13 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &dl,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &dl, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   void insertIndirectBranch(MachineBasicBlock &MBB,
                             MachineBasicBlock &NewDestBB,
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index d56fef9e9029af8..aad64fc72c411be 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -254,8 +254,8 @@ bool M68kInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, AllowModify);
 }
 
-unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                     int *BytesRemoved) const {
+unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                     bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.end();
@@ -277,9 +277,12 @@ unsigned M68kInstrInfo::removeBranch(MachineBasicBlock &MBB,
   return Count;
 }
 
-unsigned M68kInstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned M68kInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *TBB,
+                                     MachineBasicBlock *FBB,
+                                     ArrayRef<MachineOperand> Cond,
+                                     const DebugLoc &DL, int *BytesAdded,
+                                     bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h
index 577967f2fdfc97d..afa2faf71de6aa6 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.h
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.h
@@ -261,13 +261,13 @@ class M68kInstrInfo : public M68kGenInstrInfo {
                          SmallVectorImpl<MachineOperand> &Cond,
                          bool AllowModify) const;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 74057165166439f..fb983245c603943 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -104,7 +104,8 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 }
 
 unsigned MSP430InstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                       int *BytesRemoved) const {
+                                       int *BytesRemoved,
+                                       bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.end();
@@ -254,8 +255,8 @@ unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                        MachineBasicBlock *TBB,
                                        MachineBasicBlock *FBB,
                                        ArrayRef<MachineOperand> Cond,
-                                       const DebugLoc &DL,
-                                       int *BytesAdded) const {
+                                       const DebugLoc &DL, int *BytesAdded,
+                                       bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index b8d015a21cd1506..397e74da63b2248 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -61,12 +61,12 @@ class MSP430InstrInfo : public MSP430GenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   int64_t getFramePoppedByCallee(const MachineInstr &I) const {
     assert(isFrameInstr(I) && "Not a frame instruction");
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 392cc15d7943afa..99addbd15d7b169 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -135,8 +135,8 @@ unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *TBB,
                                      MachineBasicBlock *FBB,
                                      ArrayRef<MachineOperand> Cond,
-                                     const DebugLoc &DL,
-                                     int *BytesAdded) const {
+                                     const DebugLoc &DL, int *BytesAdded,
+                                     bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert(!BytesAdded && "code size not handled");
@@ -165,8 +165,8 @@ unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB,
   return 1;
 }
 
-unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                     int *BytesRemoved) const {
+unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                     bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h
index dc4b9d99b39d2a3..c193ba2d560f2e2 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -65,13 +65,13 @@ class MipsInstrInfo : public MipsGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index b0d792b5ee3fe69..e1690c6306280c3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -146,8 +146,8 @@ bool NVPTXInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                      int *BytesRemoved) const {
+unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                      bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin())
@@ -176,8 +176,8 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TBB,
                                       MachineBasicBlock *FBB,
                                       ArrayRef<MachineOperand> Cond,
-                                      const DebugLoc &DL,
-                                      int *BytesAdded) const {
+                                      const DebugLoc &DL, int *BytesAdded,
+                                      bool IsConsistent) const {
   assert(!BytesAdded && "code size not handled");
 
   // Shouldn't be a fall through.
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index cd068a0939300e7..69a6d919c62142b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -60,12 +60,12 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index e1bb2f72657f8f0..4f70c6d32fd2348 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1440,8 +1440,8 @@ bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                    int *BytesRemoved) const {
+unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                    bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
@@ -1476,8 +1476,8 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL,
-                                    int *BytesAdded) const {
+                                    const DebugLoc &DL, int *BytesAdded,
+                                    bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 1f59e994d9cb1ad..50e476258388d64 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -396,12 +396,12 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   // Select analysis.
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 9271f807a84838b..c8a0db51b270f43 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1018,8 +1018,8 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                      int *BytesRemoved) const {
+unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                      bool *IsConsistent) const {
   if (BytesRemoved)
     *BytesRemoved = 0;
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
@@ -1052,9 +1052,12 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
 
 // Inserts a branch into the end of the specific MachineBasicBlock, returning
 // the number of instructions inserted.
-unsigned RISCVInstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned RISCVInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                      MachineBasicBlock *TBB,
+                                      MachineBasicBlock *FBB,
+                                      ArrayRef<MachineOperand> Cond,
+                                      const DebugLoc &DL, int *BytesAdded,
+                                      bool IsConsistent) const {
   if (BytesAdded)
     *BytesAdded = 0;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index b33d8c28561596b..96b14fb662d0810 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -108,16 +108,16 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &dl,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &dl, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   void insertIndirectBranch(MachineBasicBlock &MBB,
                             MachineBasicBlock &NewDestBB,
                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
                             int64_t BrOffset, RegScavenger *RS) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
index 42317453a2370e1..5c59295ef21a77e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
@@ -200,8 +200,8 @@ bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 // returns the number of instructions that were removed.
 // If \p BytesRemoved is non-null, report the change in code size from the
 // removed instructions.
-unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                      int *BytesRemoved) const {
+unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                      bool *IsConsistent) const {
   report_fatal_error("Branch removal not supported, as MBB info not propagated"
                      " to OpPhi instructions. Try using -O0 instead.");
 }
@@ -219,9 +219,12 @@ unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB,
 //
 // The CFG information in MBB.Predecessors and MBB.Successors must be valid
 // before calling this function.
-unsigned SPIRVInstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned SPIRVInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                      MachineBasicBlock *TBB,
+                                      MachineBasicBlock *FBB,
+                                      ArrayRef<MachineOperand> Cond,
+                                      const DebugLoc &DL, int *BytesAdded,
+                                      bool IsConsistent) const {
   report_fatal_error("Branch insertion not supported, as MBB info not "
                      "propagated to OpPhi instructions. Try using "
                      "-O0 instead.");
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
index c01e30e109bd5b9..706846716ef9367 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
@@ -41,13 +41,13 @@ class SPIRVInstrInfo : public SPIRVGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc) const override;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 90662cd87dcf12f..db2236dca290cc5 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -328,8 +328,8 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TBB,
                                       MachineBasicBlock *FBB,
                                       ArrayRef<MachineOperand> Cond,
-                                      const DebugLoc &DL,
-                                      int *BytesAdded) const {
+                                      const DebugLoc &DL, int *BytesAdded,
+                                      bool IsConsistent) const {
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() <= 3) &&
          "Sparc branch conditions should have at most three components!");
@@ -364,8 +364,8 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                      int *BytesRemoved) const {
+unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                      bool *IsConsistent) const {
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
   int Removed = 0;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h
index 7056d6babe17b93..470199b3c209321 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -71,13 +71,13 @@ class SparcInstrInfo : public SparcGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index ac8c395f9064fb8..8377471ae85d844 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -447,7 +447,8 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 }
 
 unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                        int *BytesRemoved) const {
+                                        int *BytesRemoved,
+                                        bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   // Most of the code and comments here are boilerplate.
@@ -482,8 +483,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL,
-                                        int *BytesAdded) const {
+                                        const DebugLoc &DL, int *BytesAdded,
+                                        bool IsConsistent) const {
   // In this function we output 32-bit branches, which should always
   // have enough range.  They can be shortened and relaxed by later code
   // in the pipeline, if desired.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index bb883ea464d376f..a2c8ecf344c5a04 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -238,12 +238,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
                       Register &SrcReg2, int64_t &Mask,
                       int64_t &Value) const override;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index ebb9e21389c37b2..620e8ec0e7e4b67 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -227,7 +227,8 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *TBB,
                                    MachineBasicBlock *FBB,
                                    ArrayRef<MachineOperand> Cond,
-                                   const DebugLoc &DL, int *BytesAdded) const {
+                                   const DebugLoc &DL, int *BytesAdded,
+                                   bool IsConsistent) const {
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 3 || Cond.size() == 0) &&
          "VE branch conditions should have three component!");
@@ -288,8 +289,8 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                   int *BytesRemoved) const {
+unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                   bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.end();
diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h
index 4fe56f24116f8cc..4b68534178c5018 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/llvm/lib/Target/VE/VEInstrInfo.h
@@ -67,13 +67,13 @@ class VEInstrInfo : public VEGenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 32a4accd040ebe9..6f4369a45b27def 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -135,7 +135,8 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 }
 
 unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                            int *BytesRemoved) const {
+                                            int *BytesRemoved,
+                                            bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::instr_iterator I = MBB.instr_end();
@@ -156,9 +157,12 @@ unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
   return Count;
 }
 
-unsigned WebAssemblyInstrInfo::insertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                            MachineBasicBlock *TBB,
+                                            MachineBasicBlock *FBB,
+                                            ArrayRef<MachineOperand> Cond,
+                                            const DebugLoc &DL, int *BytesAdded,
+                                            bool IsConsistent) const {
   assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index c1e1a790c60e2cd..7b6362b9dd36f1c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -56,12 +56,12 @@ class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 56e3ac79b5957a1..fd8fe2bbaed513c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3373,8 +3373,8 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
-                                    int *BytesRemoved) const {
+unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                    bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.end();
@@ -3400,8 +3400,8 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL,
-                                    int *BytesAdded) const {
+                                    const DebugLoc &DL, int *BytesAdded,
+                                    bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b0a2d2b89074348..c27713a95bc43ca 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -360,12 +360,12 @@ class X86InstrInfo final : public X86GenInstrInfo {
                               TargetInstrInfo::MachineBranchPredicate &MBP,
                               bool AllowModify = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
                        Register, Register, Register, int &, int &,
                        int &) const override;
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index d8a8e2cddf15411..dbfffb711f2bfd3 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -272,8 +272,8 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TBB,
                                       MachineBasicBlock *FBB,
                                       ArrayRef<MachineOperand> Cond,
-                                      const DebugLoc &DL,
-                                      int *BytesAdded) const {
+                                      const DebugLoc &DL, int *BytesAdded,
+                                      bool IsConsistent) const {
   // Shouldn't be a fall through.
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -302,8 +302,8 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-unsigned
-XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const {
+unsigned XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved,
+                                      bool *IsConsistent) const {
   assert(!BytesRemoved && "code size not handled");
 
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h
index 9bf7e2dcccb7d73..ece4bfa0f23d908 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -56,11 +56,11 @@ class XCoreInstrInfo : public XCoreGenInstrInfo {
 
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL,
-                        int *BytesAdded = nullptr) const override;
+                        const DebugLoc &DL, int *BytesAdded = nullptr,
+                        bool IsConsistent = false) const override;
 
-  unsigned removeBranch(MachineBasicBlock &MBB,
-                        int *BytesRemoved = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr,
+                        bool *IsConsistent = nullptr) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6009558efca06af..29a41d012849e72 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5401,7 +5401,6 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
       !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
 
   auto *BB = SI->getParent();
-
   // Partition the cases into two sets with different destinations.
   BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
   BasicBlock *DestB = nullptr;
@@ -5465,7 +5464,9 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
     Cmp = ConstantInt::getTrue(SI->getContext());
   else
     Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
-  BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
+  BranchInst *NewBI =
+      Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest, nullptr, nullptr,
+                           SI->getMetadata(LLVMContext::MD_consistent));
 
   // Update weight for the newly-created conditional branch.
   if (hasBranchWeightMD(*SI)) {
@@ -6675,8 +6676,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   } else {
     Value *Cmp = Builder.CreateICmpULT(
         TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
-    RangeCheckBranch =
-        Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+    RangeCheckBranch = Builder.CreateCondBr(
+        Cmp, LookupBB, SI->getDefaultDest(), nullptr, nullptr,
+        SI->getMetadata(LLVMContext::MD_consistent));
     if (DTU)
       Updates.push_back({DominatorTree::Insert, BB, LookupBB});
   }
diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll
index dc00c41892ba897..08beaa492d68906 100644
--- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll
+++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s
+; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -mattr=+hbc -verify-machineinstrs | FileCheck %s -check-prefix=HBC
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-linaro-linux-gnueabi"
@@ -13,9 +14,16 @@ define void @test_add_cbz(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    cset w8, eq
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_add_cbz:
+; HBC:       // %bb.0: // %common.ret
+; HBC-NEXT:    cmn w0, w1
+; HBC-NEXT:    cset w8, eq
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:    ret
   %c = add nsw i32 %a, %b
   %d = icmp ne i32 %c, 0
-  br i1 %d, label %L1, label %L2
+  br i1 %d, label %L1, label %L2, !consistent !10
 L1:
   store i32 0, ptr %ptr, align 4
   ret void
@@ -32,9 +40,18 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    csel w8, w9, w8, ne
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_add_cbz_multiple_use:
+; HBC:       // %bb.0: // %common.ret
+; HBC-NEXT:    adds w8, w0, w1
+; HBC-NEXT:    mov w9, #10 // =0xa
+; HBC-NEXT:    csel w8, w9, w8, ne
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:    ret
   %c = add nsw i32 %a, %b
   %d = icmp ne i32 %c, 0
-  br i1 %d, label %L1, label %L2
+  br i1 %d, label %L1, label %L2, !consistent !10
+
 L1:
   store i32 10, ptr %ptr, align 4
   ret void
@@ -50,9 +67,16 @@ define void @test_add_cbz_64(i64 %a, i64 %b, ptr %ptr) {
 ; CHECK-NEXT:    cset w8, eq
 ; CHECK-NEXT:    str x8, [x2]
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_add_cbz_64:
+; HBC:       // %bb.0: // %common.ret
+; HBC-NEXT:    cmn x0, x1
+; HBC-NEXT:    cset w8, eq
+; HBC-NEXT:    str x8, [x2]
+; HBC-NEXT:    ret
   %c = add nsw i64 %a, %b
   %d = icmp ne i64 %c, 0
-  br i1 %d, label %L1, label %L2
+  br i1 %d, label %L1, label %L2, !consistent !10
 L1:
   store i64 0, ptr %ptr, align 4
   ret void
@@ -68,9 +92,16 @@ define void @test_and_cbz(i32 %a, ptr %ptr) {
 ; CHECK-NEXT:    cset w8, eq
 ; CHECK-NEXT:    str w8, [x1]
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_and_cbz:
+; HBC:       // %bb.0: // %common.ret
+; HBC-NEXT:    tst w0, #0x6
+; HBC-NEXT:    cset w8, eq
+; HBC-NEXT:    str w8, [x1]
+; HBC-NEXT:    ret
   %c = and i32 %a, 6
   %d = icmp ne i32 %c, 0
-  br i1 %d, label %L1, label %L2
+  br i1 %d, label %L1, label %L2, !consistent !10
 L1:
   store i32 0, ptr %ptr, align 4
   ret void
@@ -86,9 +117,16 @@ define void @test_bic_cbnz(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_bic_cbnz:
+; HBC:       // %bb.0: // %common.ret
+; HBC-NEXT:    bics wzr, w1, w0
+; HBC-NEXT:    cset w8, ne
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:    ret
   %c = and i32 %a, %b
   %d = icmp eq i32 %c, %b
-  br i1 %d, label %L1, label %L2
+  br i1 %d, label %L1, label %L2, !consistent !10
 L1:
   store i32 0, ptr %ptr, align 4
   ret void
@@ -106,10 +144,19 @@ define void @test_add_tbz(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:  .LBB5_2: // %L2
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_add_tbz:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    adds w8, w0, w1
+; HBC-NEXT:    bc.pl .LBB5_2
+; HBC-NEXT:  // %bb.1: // %L1
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:  .LBB5_2: // %L2
+; HBC-NEXT:    ret
 entry:
   %add = add nsw i32 %a, %b
   %cmp36 = icmp sge i32 %add, 0
-  br i1 %cmp36, label %L2, label %L1
+  br i1 %cmp36, label %L2, label %L1, !consistent !10
 L1:
   store i32 %add, ptr %ptr, align 8
   br label %L2
@@ -126,10 +173,19 @@ define void @test_subs_tbz(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:  .LBB6_2: // %L2
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_subs_tbz:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    subs w8, w0, w1
+; HBC-NEXT:    bc.pl .LBB6_2
+; HBC-NEXT:  // %bb.1: // %L1
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:  .LBB6_2: // %L2
+; HBC-NEXT:    ret
 entry:
   %sub = sub nsw i32 %a, %b
   %cmp36 = icmp sge i32 %sub, 0
-  br i1 %cmp36, label %L2, label %L1
+  br i1 %cmp36, label %L2, label %L1, !consistent !10
 L1:
   store i32 %sub, ptr %ptr, align 8
   br label %L2
@@ -146,10 +202,19 @@ define void @test_add_tbnz(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:  .LBB7_2: // %L2
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_add_tbnz:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    adds w8, w0, w1
+; HBC-NEXT:    bc.mi .LBB7_2
+; HBC-NEXT:  // %bb.1: // %L1
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:  .LBB7_2: // %L2
+; HBC-NEXT:    ret
 entry:
   %add = add nsw i32 %a, %b
   %cmp36 = icmp slt i32 %add, 0
-  br i1 %cmp36, label %L2, label %L1
+  br i1 %cmp36, label %L2, label %L1, !consistent !10
 L1:
   store i32 %add, ptr %ptr, align 8
   br label %L2
@@ -166,10 +231,19 @@ define void @test_subs_tbnz(i32 %a, i32 %b, ptr %ptr) {
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:  .LBB8_2: // %L2
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: test_subs_tbnz:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    subs w8, w0, w1
+; HBC-NEXT:    bc.mi .LBB8_2
+; HBC-NEXT:  // %bb.1: // %L1
+; HBC-NEXT:    str w8, [x2]
+; HBC-NEXT:  .LBB8_2: // %L2
+; HBC-NEXT:    ret
 entry:
   %sub = sub nsw i32 %a, %b
   %cmp36 = icmp slt i32 %sub, 0
-  br i1 %cmp36, label %L2, label %L1
+  br i1 %cmp36, label %L2, label %L1, !consistent !10
 L1:
   store i32 %sub, ptr %ptr, align 8
   br label %L2
@@ -202,11 +276,32 @@ define void @test_call_clobber(i32 %unused, i32 %a) uwtable {
 ; CHECK-NEXT:  .LBB9_2: // %if.then
 ; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    bl foo
+;
+; HBC-LABEL: test_call_clobber:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; HBC-NEXT:    .cfi_def_cfa_offset 16
+; HBC-NEXT:    .cfi_offset w19, -8
+; HBC-NEXT:    .cfi_offset w30, -16
+; HBC-NEXT:    .cfi_remember_state
+; HBC-NEXT:    and w19, w1, #0x6
+; HBC-NEXT:    mov w0, w19
+; HBC-NEXT:    bl bar
+; HBC-NEXT:    cbnz w19, .LBB9_2
+; HBC-NEXT:  // %bb.1: // %if.end
+; HBC-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; HBC-NEXT:    .cfi_def_cfa_offset 0
+; HBC-NEXT:    .cfi_restore w19
+; HBC-NEXT:    .cfi_restore w30
+; HBC-NEXT:    ret
+; HBC-NEXT:  .LBB9_2: // %if.then
+; HBC-NEXT:    .cfi_restore_state
+; HBC-NEXT:    bl foo
 entry:
   %c = and i32 %a, 6
   call void @bar(i32 %c)
   %tobool = icmp eq i32 %c, 0
-  br i1 %tobool, label %if.end, label %if.then
+  br i1 %tobool, label %if.end, label %if.then, !consistent !10
 
 if.then:
   tail call void @foo()
@@ -215,3 +310,5 @@ if.then:
 if.end:
   ret void
 }
+
+!10 = !{i1 true}
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index b63d540fb8e0291..05d10dfb1a3226c 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+hbc -O3 < %s | FileCheck %s -check-prefix=HBC
 
 define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
 ; CHECK-LABEL: loop1:
@@ -68,16 +69,83 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    b.ne .LBB0_7
 ; CHECK-NEXT:  .LBB0_8: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: loop1:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    subs w8, w2, #1
+; HBC-NEXT:    bc.lt .LBB0_8
+; HBC-NEXT:  // %bb.1: // %for.body.preheader
+; HBC-NEXT:    cmp w8, #6
+; HBC-NEXT:    bc.hi .LBB0_3
+; HBC-NEXT:  // %bb.2:
+; HBC-NEXT:    mov w10, wzr
+; HBC-NEXT:    mov x8, x1
+; HBC-NEXT:    mov x9, x0
+; HBC-NEXT:    b .LBB0_6
+; HBC-NEXT:  .LBB0_3: // %vector.ph
+; HBC-NEXT:    add x11, x8, #1
+; HBC-NEXT:    mov w8, #1132396544 // =0x437f0000
+; HBC-NEXT:    add x12, x0, #4
+; HBC-NEXT:    and x10, x11, #0x1fffffff8
+; HBC-NEXT:    dup v0.4s, w8
+; HBC-NEXT:    add x13, x1, #16
+; HBC-NEXT:    add x8, x1, x10, lsl #2
+; HBC-NEXT:    add x9, x0, x10
+; HBC-NEXT:    mov x14, x10
+; HBC-NEXT:  .LBB0_4: // %vector.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ldp q1, q2, [x13, #-16]
+; HBC-NEXT:    subs x14, x14, #8
+; HBC-NEXT:    add x13, x13, #32
+; HBC-NEXT:    fcmgt v3.4s, v1.4s, v0.4s
+; HBC-NEXT:    fcmgt v4.4s, v2.4s, v0.4s
+; HBC-NEXT:    fcmlt v5.4s, v1.4s, #0.0
+; HBC-NEXT:    fcmlt v6.4s, v2.4s, #0.0
+; HBC-NEXT:    bit v1.16b, v0.16b, v3.16b
+; HBC-NEXT:    bit v2.16b, v0.16b, v4.16b
+; HBC-NEXT:    bic v1.16b, v1.16b, v5.16b
+; HBC-NEXT:    bic v2.16b, v2.16b, v6.16b
+; HBC-NEXT:    fcvtzs v1.4s, v1.4s
+; HBC-NEXT:    fcvtzs v2.4s, v2.4s
+; HBC-NEXT:    xtn v1.4h, v1.4s
+; HBC-NEXT:    xtn v2.4h, v2.4s
+; HBC-NEXT:    xtn v1.8b, v1.8h
+; HBC-NEXT:    xtn v2.8b, v2.8h
+; HBC-NEXT:    mov v1.s[1], v2.s[0]
+; HBC-NEXT:    stur d1, [x12, #-4]
+; HBC-NEXT:    add x12, x12, #8
+; HBC-NEXT:    bc.ne .LBB0_4
+; HBC-NEXT:  // %bb.5: // %middle.block
+; HBC-NEXT:    cmp x11, x10
+; HBC-NEXT:    bc.eq .LBB0_8
+; HBC-NEXT:  .LBB0_6: // %for.body.preheader1
+; HBC-NEXT:    movi d0, #0000000000000000
+; HBC-NEXT:    sub w10, w2, w10
+; HBC-NEXT:    mov w11, #1132396544 // =0x437f0000
+; HBC-NEXT:  .LBB0_7: // %for.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    fmov s2, w11
+; HBC-NEXT:    ldr s1, [x8], #4
+; HBC-NEXT:    fcmp s1, s2
+; HBC-NEXT:    fcsel s2, s2, s1, gt
+; HBC-NEXT:    fcmp s1, #0.0
+; HBC-NEXT:    fcsel s1, s0, s2, mi
+; HBC-NEXT:    subs w10, w10, #1
+; HBC-NEXT:    fcvtzs w12, s1
+; HBC-NEXT:    strb w12, [x9], #1
+; HBC-NEXT:    bc.ne .LBB0_7
+; HBC-NEXT:  .LBB0_8: // %for.cond.cleanup
+; HBC-NEXT:    ret
 entry:
   %cmp9 = icmp sgt i32 %width, 0
-  br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup
+  br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup, !consistent !10
 
 for.body.preheader:                               ; preds = %entry
   %0 = add i32 %width, -1
   %1 = zext i32 %0 to i64
   %2 = add nuw nsw i64 %1, 1
   %min.iters.check = icmp ult i32 %0, 7
-  br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph
+  br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph, !consistent !10
 
 vector.ph:                                        ; preds = %for.body.preheader
   %n.vec = and i64 %2, 8589934584
@@ -108,11 +176,11 @@ vector.body:                                      ; preds = %vector.body, %vecto
   store <4 x i8> %13, ptr %14, align 1
   %index.next = add nuw i64 %index, 8
   %15 = icmp eq i64 %index.next, %n.vec
-  br i1 %15, label %middle.block, label %vector.body
+  br i1 %15, label %middle.block, label %vector.body, !consistent !10
 
 middle.block:                                     ; preds = %vector.body
   %cmp.n = icmp eq i64 %2, %n.vec
-  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21, !consistent !10
 
 for.body.preheader21:                             ; preds = %for.body.preheader, %middle.block
   %i.012.ph = phi i32 [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
@@ -138,7 +206,7 @@ for.body:                                         ; preds = %for.body.preheader2
   %add.ptr2 = getelementptr inbounds i8, ptr %dst.addr.010, i64 1
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, %width
-  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10
 }
 
 define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
@@ -219,16 +287,94 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    cmp x11, x10
 ; CHECK-NEXT:    b.ne .LBB1_5
 ; CHECK-NEXT:    b .LBB1_7
+;
+; HBC-LABEL: loop2:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    subs w8, w2, #1
+; HBC-NEXT:    bc.lt .LBB1_7
+; HBC-NEXT:  // %bb.1: // %for.body.preheader
+; HBC-NEXT:    cmp w8, #2
+; HBC-NEXT:    bc.ls .LBB1_4
+; HBC-NEXT:  // %bb.2: // %vector.memcheck
+; HBC-NEXT:    ubfiz x9, x8, #1, #32
+; HBC-NEXT:    add x9, x9, #2
+; HBC-NEXT:    add x10, x1, x9, lsl #2
+; HBC-NEXT:    cmp x10, x0
+; HBC-NEXT:    bc.ls .LBB1_8
+; HBC-NEXT:  // %bb.3: // %vector.memcheck
+; HBC-NEXT:    add x9, x0, x9
+; HBC-NEXT:    cmp x9, x1
+; HBC-NEXT:    b.ls .LBB1_8
+; HBC-NEXT:  .LBB1_4:
+; HBC-NEXT:    mov w10, wzr
+; HBC-NEXT:    mov x8, x1
+; HBC-NEXT:    mov x9, x0
+; HBC-NEXT:  .LBB1_5: // %for.body.preheader1
+; HBC-NEXT:    movi d0, #0000000000000000
+; HBC-NEXT:    sub w10, w2, w10
+; HBC-NEXT:    mov w11, #1132396544 // =0x437f0000
+; HBC-NEXT:  .LBB1_6: // %for.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ldp s1, s3, [x8], #8
+; HBC-NEXT:    fmov s2, w11
+; HBC-NEXT:    fcmp s1, s2
+; HBC-NEXT:    fcsel s4, s2, s1, gt
+; HBC-NEXT:    fcmp s1, #0.0
+; HBC-NEXT:    fcsel s1, s0, s4, mi
+; HBC-NEXT:    fcmp s3, s2
+; HBC-NEXT:    fcsel s2, s2, s3, gt
+; HBC-NEXT:    fcmp s3, #0.0
+; HBC-NEXT:    fcvtzs w12, s1
+; HBC-NEXT:    fcsel s2, s0, s2, mi
+; HBC-NEXT:    subs w10, w10, #1
+; HBC-NEXT:    strb w12, [x9]
+; HBC-NEXT:    fcvtzs w13, s2
+; HBC-NEXT:    strb w13, [x9, #1]
+; HBC-NEXT:    add x9, x9, #2
+; HBC-NEXT:    bc.ne .LBB1_6
+; HBC-NEXT:  .LBB1_7: // %for.cond.cleanup
+; HBC-NEXT:    ret
+; HBC-NEXT:  .LBB1_8: // %vector.ph
+; HBC-NEXT:    add x11, x8, #1
+; HBC-NEXT:    mov w8, #1132396544 // =0x437f0000
+; HBC-NEXT:    and x10, x11, #0x1fffffffc
+; HBC-NEXT:    dup v0.4s, w8
+; HBC-NEXT:    add x8, x1, x10, lsl #3
+; HBC-NEXT:    add x9, x0, x10, lsl #1
+; HBC-NEXT:    mov x12, x10
+; HBC-NEXT:  .LBB1_9: // %vector.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ld2 { v1.4s, v2.4s }, [x1], #32
+; HBC-NEXT:    subs x12, x12, #4
+; HBC-NEXT:    fcmgt v3.4s, v1.4s, v0.4s
+; HBC-NEXT:    fcmgt v4.4s, v2.4s, v0.4s
+; HBC-NEXT:    fcmlt v5.4s, v1.4s, #0.0
+; HBC-NEXT:    bsl v3.16b, v0.16b, v1.16b
+; HBC-NEXT:    bsl v4.16b, v0.16b, v2.16b
+; HBC-NEXT:    fcmlt v1.4s, v2.4s, #0.0
+; HBC-NEXT:    bic v2.16b, v3.16b, v5.16b
+; HBC-NEXT:    bic v1.16b, v4.16b, v1.16b
+; HBC-NEXT:    fcvtzs v2.4s, v2.4s
+; HBC-NEXT:    fcvtzs v1.4s, v1.4s
+; HBC-NEXT:    xtn v2.4h, v2.4s
+; HBC-NEXT:    xtn v1.4h, v1.4s
+; HBC-NEXT:    trn1 v1.8b, v2.8b, v1.8b
+; HBC-NEXT:    str d1, [x0], #8
+; HBC-NEXT:    bc.ne .LBB1_9
+; HBC-NEXT:  // %bb.10: // %middle.block
+; HBC-NEXT:    cmp x11, x10
+; HBC-NEXT:    bc.ne .LBB1_5
+; HBC-NEXT:    b .LBB1_7
 entry:
   %cmp19 = icmp sgt i32 %width, 0
-  br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
+  br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup, !consistent !10
 
 for.body.preheader:                               ; preds = %entry
   %0 = add i32 %width, -1
   %1 = zext i32 %0 to i64
   %2 = add nuw nsw i64 %1, 1
   %min.iters.check = icmp ult i32 %0, 3
-  br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck
+  br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck, !consistent !10
 
 vector.memcheck:                                  ; preds = %for.body.preheader
   %3 = add i32 %width, -1
@@ -240,7 +386,7 @@ vector.memcheck:                                  ; preds = %for.body.preheader
   %bound0 = icmp ugt ptr %scevgep24, %dst
   %bound1 = icmp ugt ptr %scevgep, %data
   %found.conflict = and i1 %bound0, %bound1
-  br i1 %found.conflict, label %for.body.preheader35, label %vector.ph
+  br i1 %found.conflict, label %for.body.preheader35, label %vector.ph, !consistent !10
 
 vector.ph:                                        ; preds = %vector.memcheck
   %n.vec = and i64 %2, 8589934588
@@ -274,11 +420,11 @@ vector.body:                                      ; preds = %vector.body, %vecto
   store <8 x i8> %interleaved.vec, ptr %21, align 1
   %index.next = add nuw i64 %index, 4
   %22 = icmp eq i64 %index.next, %n.vec
-  br i1 %22, label %middle.block, label %vector.body
+  br i1 %22, label %middle.block, label %vector.body, !consistent !10
 
 middle.block:                                     ; preds = %vector.body
   %cmp.n = icmp eq i64 %2, %n.vec
-  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35, !consistent !10
 
 for.body.preheader35:                             ; preds = %vector.memcheck, %for.body.preheader, %middle.block
   %i.022.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
@@ -313,7 +459,7 @@ for.body:                                         ; preds = %for.body.preheader3
   %add.ptr6 = getelementptr inbounds i8, ptr %dst.addr.020, i64 2
   %inc = add nuw nsw i32 %i.022, 1
   %exitcond.not = icmp eq i32 %inc, %width
-  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10
 }
 
 define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
@@ -411,16 +557,111 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    b.ne .LBB2_8
 ; CHECK-NEXT:  .LBB2_9: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
+;
+; HBC-LABEL: loop3:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    subs w8, w2, #1
+; HBC-NEXT:    bc.lt .LBB2_9
+; HBC-NEXT:  // %bb.1: // %for.body.preheader
+; HBC-NEXT:    cmp w8, #2
+; HBC-NEXT:    bc.ls .LBB2_6
+; HBC-NEXT:  // %bb.2: // %vector.memcheck
+; HBC-NEXT:    add x9, x8, w8, uxtw #1
+; HBC-NEXT:    add x9, x9, #3
+; HBC-NEXT:    add x10, x1, x9, lsl #2
+; HBC-NEXT:    add x9, x0, x9
+; HBC-NEXT:    cmp x10, x0
+; HBC-NEXT:    ccmp x9, x1, #0, hi
+; HBC-NEXT:    b.hi .LBB2_6
+; HBC-NEXT:  // %bb.3: // %vector.ph
+; HBC-NEXT:    add x11, x8, #1
+; HBC-NEXT:    mov w8, #1132396544 // =0x437f0000
+; HBC-NEXT:    adrp x12, .LCPI2_0
+; HBC-NEXT:    and x10, x11, #0x1fffffffc
+; HBC-NEXT:    dup v0.4s, w8
+; HBC-NEXT:    ldr q1, [x12, :lo12:.LCPI2_0]
+; HBC-NEXT:    add x9, x10, x10, lsl #1
+; HBC-NEXT:    mov x12, x10
+; HBC-NEXT:    add x8, x1, x9, lsl #2
+; HBC-NEXT:    add x9, x0, x9
+; HBC-NEXT:  .LBB2_4: // %vector.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48
+; HBC-NEXT:    add x13, x0, #8
+; HBC-NEXT:    subs x12, x12, #4
+; HBC-NEXT:    fcmgt v5.4s, v2.4s, v0.4s
+; HBC-NEXT:    fcmgt v6.4s, v3.4s, v0.4s
+; HBC-NEXT:    fcmgt v7.4s, v4.4s, v0.4s
+; HBC-NEXT:    fcmlt v16.4s, v2.4s, #0.0
+; HBC-NEXT:    fcmlt v17.4s, v3.4s, #0.0
+; HBC-NEXT:    bsl v5.16b, v0.16b, v2.16b
+; HBC-NEXT:    bsl v6.16b, v0.16b, v3.16b
+; HBC-NEXT:    bsl v7.16b, v0.16b, v4.16b
+; HBC-NEXT:    fcmlt v2.4s, v4.4s, #0.0
+; HBC-NEXT:    bic v3.16b, v5.16b, v16.16b
+; HBC-NEXT:    bic v4.16b, v6.16b, v17.16b
+; HBC-NEXT:    bic v2.16b, v7.16b, v2.16b
+; HBC-NEXT:    fcvtzs v3.4s, v3.4s
+; HBC-NEXT:    fcvtzs v4.4s, v4.4s
+; HBC-NEXT:    fcvtzs v2.4s, v2.4s
+; HBC-NEXT:    xtn v5.4h, v3.4s
+; HBC-NEXT:    xtn v6.4h, v4.4s
+; HBC-NEXT:    xtn v7.4h, v2.4s
+; HBC-NEXT:    tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b
+; HBC-NEXT:    st1 { v2.s }[2], [x13]
+; HBC-NEXT:    str d2, [x0], #12
+; HBC-NEXT:    bc.ne .LBB2_4
+; HBC-NEXT:  // %bb.5: // %middle.block
+; HBC-NEXT:    cmp x11, x10
+; HBC-NEXT:    bc.ne .LBB2_7
+; HBC-NEXT:    b .LBB2_9
+; HBC-NEXT:  .LBB2_6:
+; HBC-NEXT:    mov w10, wzr
+; HBC-NEXT:    mov x8, x1
+; HBC-NEXT:    mov x9, x0
+; HBC-NEXT:  .LBB2_7: // %for.body.preheader1
+; HBC-NEXT:    movi d0, #0000000000000000
+; HBC-NEXT:    sub w10, w2, w10
+; HBC-NEXT:    mov w11, #1132396544 // =0x437f0000
+; HBC-NEXT:  .LBB2_8: // %for.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ldp s1, s3, [x8]
+; HBC-NEXT:    fmov s2, w11
+; HBC-NEXT:    fcmp s1, s2
+; HBC-NEXT:    fcsel s4, s2, s1, gt
+; HBC-NEXT:    fcmp s1, #0.0
+; HBC-NEXT:    fcsel s1, s0, s4, mi
+; HBC-NEXT:    fcmp s3, s2
+; HBC-NEXT:    fcsel s4, s2, s3, gt
+; HBC-NEXT:    fcmp s3, #0.0
+; HBC-NEXT:    ldr s3, [x8, #8]
+; HBC-NEXT:    fcvtzs w12, s1
+; HBC-NEXT:    add x8, x8, #12
+; HBC-NEXT:    fcsel s4, s0, s4, mi
+; HBC-NEXT:    fcmp s3, s2
+; HBC-NEXT:    strb w12, [x9]
+; HBC-NEXT:    fcsel s2, s2, s3, gt
+; HBC-NEXT:    fcmp s3, #0.0
+; HBC-NEXT:    fcvtzs w13, s4
+; HBC-NEXT:    fcsel s2, s0, s2, mi
+; HBC-NEXT:    subs w10, w10, #1
+; HBC-NEXT:    strb w13, [x9, #1]
+; HBC-NEXT:    fcvtzs w14, s2
+; HBC-NEXT:    strb w14, [x9, #2]
+; HBC-NEXT:    add x9, x9, #3
+; HBC-NEXT:    bc.ne .LBB2_8
+; HBC-NEXT:  .LBB2_9: // %for.cond.cleanup
+; HBC-NEXT:    ret
 entry:
   %cmp29 = icmp sgt i32 %width, 0
-  br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup
+  br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup, !consistent !10
 
 for.body.preheader:                               ; preds = %entry
   %0 = add i32 %width, -1
   %1 = zext i32 %0 to i64
   %2 = add nuw nsw i64 %1, 1
   %min.iters.check = icmp ult i32 %0, 3
-  br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck
+  br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck, !consistent !10
 
 vector.memcheck:                                  ; preds = %for.body.preheader
   %3 = add i32 %width, -1
@@ -432,7 +673,7 @@ vector.memcheck:                                  ; preds = %for.body.preheader
   %bound0 = icmp ugt ptr %scevgep34, %dst
   %bound1 = icmp ugt ptr %scevgep, %data
   %found.conflict = and i1 %bound0, %bound1
-  br i1 %found.conflict, label %for.body.preheader46, label %vector.ph
+  br i1 %found.conflict, label %for.body.preheader46, label %vector.ph, !consistent !10
 
 vector.ph:                                        ; preds = %vector.memcheck
   %n.vec = and i64 %2, 8589934588
@@ -474,11 +715,11 @@ vector.body:                                      ; preds = %vector.body, %vecto
   store <12 x i8> %interleaved.vec, ptr %26, align 1
   %index.next = add nuw i64 %index, 4
   %29 = icmp eq i64 %index.next, %n.vec
-  br i1 %29, label %middle.block, label %vector.body
+  br i1 %29, label %middle.block, label %vector.body, !consistent !10
 
 middle.block:                                     ; preds = %vector.body
   %cmp.n = icmp eq i64 %2, %n.vec
-  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46, !consistent !10
 
 for.body.preheader46:                             ; preds = %vector.memcheck, %for.body.preheader, %middle.block
   %i.032.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
@@ -522,7 +763,7 @@ for.body:                                         ; preds = %for.body.preheader4
   %add.ptr10 = getelementptr inbounds i8, ptr %dst.addr.030, i64 3
   %inc = add nuw nsw i32 %i.032, 1
   %exitcond.not = icmp eq i32 %inc, %width
-  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10
 }
 
 define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
@@ -631,16 +872,122 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    cmp x11, x10
 ; CHECK-NEXT:    b.ne .LBB3_5
 ; CHECK-NEXT:    b .LBB3_7
+;
+; HBC-LABEL: loop4:
+; HBC:       // %bb.0: // %entry
+; HBC-NEXT:    subs w8, w2, #1
+; HBC-NEXT:    bc.lt .LBB3_7
+; HBC-NEXT:  // %bb.1: // %for.body.preheader
+; HBC-NEXT:    cmp w8, #2
+; HBC-NEXT:    bc.ls .LBB3_4
+; HBC-NEXT:  // %bb.2: // %vector.memcheck
+; HBC-NEXT:    ubfiz x9, x8, #2, #32
+; HBC-NEXT:    add x9, x9, #4
+; HBC-NEXT:    add x10, x1, x9, lsl #2
+; HBC-NEXT:    cmp x10, x0
+; HBC-NEXT:    bc.ls .LBB3_8
+; HBC-NEXT:  // %bb.3: // %vector.memcheck
+; HBC-NEXT:    add x9, x0, x9
+; HBC-NEXT:    cmp x9, x1
+; HBC-NEXT:    b.ls .LBB3_8
+; HBC-NEXT:  .LBB3_4:
+; HBC-NEXT:    mov w10, wzr
+; HBC-NEXT:    mov x8, x1
+; HBC-NEXT:    mov x9, x0
+; HBC-NEXT:  .LBB3_5: // %for.body.preheader1
+; HBC-NEXT:    movi d0, #0000000000000000
+; HBC-NEXT:    sub w10, w2, w10
+; HBC-NEXT:    mov w11, #1132396544 // =0x437f0000
+; HBC-NEXT:  .LBB3_6: // %for.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ldp s1, s3, [x8]
+; HBC-NEXT:    fmov s2, w11
+; HBC-NEXT:    fcmp s1, s2
+; HBC-NEXT:    fcsel s4, s2, s1, gt
+; HBC-NEXT:    fcmp s1, #0.0
+; HBC-NEXT:    fcsel s1, s0, s4, mi
+; HBC-NEXT:    fcmp s3, s2
+; HBC-NEXT:    fcsel s4, s2, s3, gt
+; HBC-NEXT:    fcmp s3, #0.0
+; HBC-NEXT:    ldp s3, s5, [x8, #8]
+; HBC-NEXT:    fcvtzs w12, s1
+; HBC-NEXT:    add x8, x8, #16
+; HBC-NEXT:    fcsel s4, s0, s4, mi
+; HBC-NEXT:    fcmp s3, s2
+; HBC-NEXT:    strb w12, [x9]
+; HBC-NEXT:    fcsel s6, s2, s3, gt
+; HBC-NEXT:    fcmp s3, #0.0
+; HBC-NEXT:    fcvtzs w13, s4
+; HBC-NEXT:    fcsel s3, s0, s6, mi
+; HBC-NEXT:    fcmp s5, s2
+; HBC-NEXT:    strb w13, [x9, #1]
+; HBC-NEXT:    fcsel s2, s2, s5, gt
+; HBC-NEXT:    fcmp s5, #0.0
+; HBC-NEXT:    fcvtzs w14, s3
+; HBC-NEXT:    fcsel s2, s0, s2, mi
+; HBC-NEXT:    subs w10, w10, #1
+; HBC-NEXT:    strb w14, [x9, #2]
+; HBC-NEXT:    fcvtzs w15, s2
+; HBC-NEXT:    strb w15, [x9, #3]
+; HBC-NEXT:    add x9, x9, #4
+; HBC-NEXT:    bc.ne .LBB3_6
+; HBC-NEXT:  .LBB3_7: // %for.cond.cleanup
+; HBC-NEXT:    ret
+; HBC-NEXT:  .LBB3_8: // %vector.ph
+; HBC-NEXT:    add x11, x8, #1
+; HBC-NEXT:    mov w8, #1132396544 // =0x437f0000
+; HBC-NEXT:    adrp x12, .LCPI3_0
+; HBC-NEXT:    and x10, x11, #0x1fffffffc
+; HBC-NEXT:    dup v0.4s, w8
+; HBC-NEXT:    ldr q1, [x12, :lo12:.LCPI3_0]
+; HBC-NEXT:    add x8, x1, x10, lsl #4
+; HBC-NEXT:    add x9, x0, x10, lsl #2
+; HBC-NEXT:    mov x12, x10
+; HBC-NEXT:  .LBB3_9: // %vector.body
+; HBC-NEXT:    // =>This Inner Loop Header: Depth=1
+; HBC-NEXT:    ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64
+; HBC-NEXT:    subs x12, x12, #4
+; HBC-NEXT:    fcmgt v6.4s, v2.4s, v0.4s
+; HBC-NEXT:    fcmgt v7.4s, v3.4s, v0.4s
+; HBC-NEXT:    fcmgt v16.4s, v4.4s, v0.4s
+; HBC-NEXT:    fcmgt v17.4s, v5.4s, v0.4s
+; HBC-NEXT:    fcmlt v18.4s, v2.4s, #0.0
+; HBC-NEXT:    fcmlt v19.4s, v3.4s, #0.0
+; HBC-NEXT:    fcmlt v20.4s, v4.4s, #0.0
+; HBC-NEXT:    bsl v6.16b, v0.16b, v2.16b
+; HBC-NEXT:    bsl v7.16b, v0.16b, v3.16b
+; HBC-NEXT:    bsl v16.16b, v0.16b, v4.16b
+; HBC-NEXT:    bsl v17.16b, v0.16b, v5.16b
+; HBC-NEXT:    fcmlt v2.4s, v5.4s, #0.0
+; HBC-NEXT:    bic v3.16b, v6.16b, v18.16b
+; HBC-NEXT:    bic v4.16b, v7.16b, v19.16b
+; HBC-NEXT:    bic v5.16b, v16.16b, v20.16b
+; HBC-NEXT:    bic v2.16b, v17.16b, v2.16b
+; HBC-NEXT:    fcvtzs v3.4s, v3.4s
+; HBC-NEXT:    fcvtzs v4.4s, v4.4s
+; HBC-NEXT:    fcvtzs v5.4s, v5.4s
+; HBC-NEXT:    fcvtzs v2.4s, v2.4s
+; HBC-NEXT:    xtn v16.4h, v3.4s
+; HBC-NEXT:    xtn v17.4h, v4.4s
+; HBC-NEXT:    xtn v18.4h, v5.4s
+; HBC-NEXT:    xtn v19.4h, v2.4s
+; HBC-NEXT:    tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; HBC-NEXT:    str q2, [x0], #16
+; HBC-NEXT:    bc.ne .LBB3_9
+; HBC-NEXT:  // %bb.10: // %middle.block
+; HBC-NEXT:    cmp x11, x10
+; HBC-NEXT:    bc.ne .LBB3_5
+; HBC-NEXT:    b .LBB3_7
 entry:
   %cmp39 = icmp sgt i32 %width, 0
-  br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup
+  br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup, !consistent !10
 
 for.body.preheader:                               ; preds = %entry
   %0 = add i32 %width, -1
   %1 = zext i32 %0 to i64
   %2 = add nuw nsw i64 %1, 1
   %min.iters.check = icmp ult i32 %0, 3
-  br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck
+  br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck, !consistent !10
 
 vector.memcheck:                                  ; preds = %for.body.preheader
   %3 = add i32 %width, -1
@@ -652,7 +999,7 @@ vector.memcheck:                                  ; preds = %for.body.preheader
   %bound0 = icmp ugt ptr %scevgep44, %dst
   %bound1 = icmp ugt ptr %scevgep, %data
   %found.conflict = and i1 %bound0, %bound1
-  br i1 %found.conflict, label %for.body.preheader57, label %vector.ph
+  br i1 %found.conflict, label %for.body.preheader57, label %vector.ph, !consistent !10
 
 vector.ph:                                        ; preds = %vector.memcheck
   %n.vec = and i64 %2, 8589934588
@@ -700,11 +1047,11 @@ vector.body:                                      ; preds = %vector.body, %vecto
   store <16 x i8> %interleaved.vec, ptr %31, align 1
   %index.next = add nuw i64 %index, 4
   %34 = icmp eq i64 %index.next, %n.vec
-  br i1 %34, label %middle.block, label %vector.body
+  br i1 %34, label %middle.block, label %vector.body, !consistent !10
 
 middle.block:                                     ; preds = %vector.body
   %cmp.n = icmp eq i64 %2, %n.vec
-  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57, !consistent !10
 
 for.body.preheader57:                             ; preds = %vector.memcheck, %for.body.preheader, %middle.block
   %i.042.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
@@ -757,5 +1104,7 @@ for.body:                                         ; preds = %for.body.preheader5
   %add.ptr14 = getelementptr inbounds i8, ptr %dst.addr.040, i64 4
   %inc = add nuw nsw i32 %i.042, 1
   %exitcond.not = icmp eq i32 %inc, %width
-  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !consistent !10
 }
+
+!10 = !{i1 true}



More information about the cfe-commits mailing list