[llvm] [DecoderEmitter] Support for DecodeOrder and `resolve-conflicts-try-all` (PR #157948)

Thu Sep 11 11:31:26 PDT 2025

https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/157948

>From 7affc5b4ef78843ac8509bc8ece1ea27d762c1c1 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 10 Sep 2025 12:15:57 -0700
Subject: [PATCH 1/5] [DecoderEmitter] Add support for DecodeOrder and
 `resolve-conflicts-try-all`

---
 llvm/include/llvm/Target/Target.td            |   4 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |   7 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   4 +-
 .../Disassembler/AArch64Disassembler.cpp      |  62 +++----
 .../Disassembler/AMDGPUDisassembler.cpp       |  43 +----
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  |   4 -
 llvm/lib/Target/AMDGPU/VINTERPInstructions.td |  10 +-
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    |  12 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    |  12 +-
 llvm/lib/Target/AMDGPU/VOPCInstructions.td    |  11 +-
 llvm/lib/Target/AMDGPU/VOPInstructions.td     |  27 +--
 .../RISCV/Disassembler/RISCVDisassembler.cpp  |  22 ++-
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |  14 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td      |   6 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  40 ++---
 llvm/lib/Target/RISCV/RISCVInstrInfoZa.td     |   2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZc.td     |  12 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZclsd.td  |   5 +-
 .../lib/Target/RISCV/RISCVInstrInfoZicfiss.td |   2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td  |   4 +-
 llvm/utils/TableGen/DecoderEmitter.cpp        | 164 ++++++++++++++++--
 21 files changed, 304 insertions(+), 163 deletions(-)

diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 6a7ecf78b2131..c24083ec51fee 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -551,6 +551,10 @@ class InstructionEncoding {
   // where multiple ISA namespaces exist).
   string DecoderNamespace = "";
 
+  // Within a namespace, DecodeOrder is used to order instructions when we need
+  // to attempt multiple encoding.
+  int DecodeOrder = 0;
+
   // List of predicates which will be turned into isel matching code.
   list<Predicate> Predicates = [];
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 8958ad129269c..9d3286b7f2cd8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1957,7 +1957,7 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
                        "mrs", "\t$Rt, $systemreg"> {
   bits<16> systemreg;
   let Inst{20-5} = systemreg;
-  let DecoderNamespace = "Fallback";
+  let DecodeOrder = 1;
   // The MRS is set as a NZCV setting instruction. Not all MRS instructions
   // require doing this. The alternative was to explicitly model each one, but
   // it feels like it is unnecessary because it seems there are no negative
@@ -1972,7 +1972,7 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
                        "msr", "\t$systemreg, $Rt"> {
   bits<16> systemreg;
   let Inst{20-5} = systemreg;
-  let DecoderNamespace = "Fallback";
+  let DecodeOrder = 1;
 }
 
 def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
@@ -2045,7 +2045,8 @@ class MSRpstateImm0_1
   // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
   // Fail the decoder should attempt to decode the instruction as MSRI.
   let hasCompleteDecoder = false;
-  let DecoderNamespace = "Fallback";
+  let DecodeOrder = 1;
+
 }
 
 // SYS and SYSL generic system instructions.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f0020a9a3c91d..fce95c28f22b6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -10712,8 +10712,8 @@ def RPRFM:
   let hasSideEffects = 1;
   // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
   // Fail, the decoder should attempt to decode RPRFM. This requires setting
-  // the decoder namespace to "Fallback".
-  let DecoderNamespace = "Fallback";
+  // the decode order for RPRFM to be 1 ( > decode order for PRFM).
+  let DecodeOrder = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index aa1c1c882e225..9f55c7a7e7bfd 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1578,43 +1578,37 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
   uint32_t Insn =
       (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
 
-  const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32};
-
-  for (const auto *Table : Tables) {
-    DecodeStatus Result =
-        decodeInstruction(Table, MI, Insn, Address, this, STI);
-
-    const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
-
-    // For Scalable Matrix Extension (SME) instructions that have an implicit
-    // operand for the accumulator (ZA) or implicit immediate zero which isn't
-    // encoded, manually insert operand.
-    for (unsigned i = 0; i < Desc.getNumOperands(); i++) {
-      if (Desc.operands()[i].OperandType == MCOI::OPERAND_REGISTER) {
-        switch (Desc.operands()[i].RegClass) {
-        default:
-          break;
-        case AArch64::MPRRegClassID:
-          MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZA));
-          break;
-        case AArch64::MPR8RegClassID:
-          MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0));
-          break;
-        case AArch64::ZTRRegClassID:
-          MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZT0));
-          break;
-        }
-      } else if (Desc.operands()[i].OperandType ==
-                 AArch64::OPERAND_IMPLICIT_IMM_0) {
-        MI.insert(MI.begin() + i, MCOperand::createImm(0));
+  DecodeStatus Result =
+      decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
+  if (Result != Success)
+    return Result;
+
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+
+  // For Scalable Matrix Extension (SME) instructions that have an implicit
+  // operand for the accumulator (ZA) or implicit immediate zero which isn't
+  // encoded, manually insert operand.
+  for (unsigned i = 0; i < Desc.getNumOperands(); i++) {
+    if (Desc.operands()[i].OperandType == MCOI::OPERAND_REGISTER) {
+      switch (Desc.operands()[i].RegClass) {
+      default:
+        break;
+      case AArch64::MPRRegClassID:
+        MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZA));
+        break;
+      case AArch64::MPR8RegClassID:
+        MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0));
+        break;
+      case AArch64::ZTRRegClassID:
+        MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZT0));
+        break;
       }
+    } else if (Desc.operands()[i].OperandType ==
+               AArch64::OPERAND_IMPLICIT_IMM_0) {
+      MI.insert(MI.begin() + i, MCOperand::createImm(0));
     }
-
-    if (Result != MCDisassembler::Fail)
-      return Result;
   }
-
-  return MCDisassembler::Fail;
+  return Success;
 }
 
 uint64_t AArch64Disassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 6f6039bf4ec21..28467b44c711d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -487,18 +487,6 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
   return MCDisassembler::Fail;
 }
 
-template <typename InsnType>
-DecodeStatus
-AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
-                                  MCInst &MI, InsnType Inst, uint64_t Address,
-                                  raw_ostream &Comments) const {
-  for (const uint8_t *T : {Table1, Table2}) {
-    if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
-      return Res;
-  }
-  return MCDisassembler::Fail;
-}
-
 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
   assert(Bytes.size() >= sizeof(T));
   const auto Res =
@@ -617,18 +605,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       std::bitset<96> DecW = eat12Bytes(Bytes);
 
       if (isGFX11() &&
-          tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
-                        DecW, Address, CS))
+          tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS))
         break;
 
       if (isGFX1250() &&
-          tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
-                        DecW, Address, CS))
+          tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
         break;
 
       if (isGFX12() &&
-          tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
-                        DecW, Address, CS))
+          tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS))
         break;
 
       if (isGFX12() &&
@@ -698,18 +683,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
         break;
 
       if (isGFX1250() &&
-          tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
-                        QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX125064, MI, QW, Address, CS))
         break;
 
-      if (isGFX12() &&
-          tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
-                        Address, CS))
+      if (isGFX12() && tryDecodeInst(DecoderTableGFX1264, MI, QW, Address, CS))
         break;
 
-      if (isGFX11() &&
-          tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
-                        Address, CS))
+      if (isGFX11() && tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS))
         break;
 
       if (isGFX11() &&
@@ -753,19 +733,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
         break;
 
-      if (isGFX11() &&
-          tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
-                        Address, CS))
+      if (isGFX11() && tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS))
         break;
 
       if (isGFX1250() &&
-          tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
-                        DW, Address, CS))
+          tryDecodeInst(DecoderTableGFX125032, MI, DW, Address, CS))
         break;
 
-      if (isGFX12() &&
-          tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
-                        Address, CS))
+      if (isGFX12() && tryDecodeInst(DecoderTableGFX1232, MI, DW, Address, CS))
         break;
     }
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index c1131c2936fc7..b33a8bfa7db6d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -79,10 +79,6 @@ class AMDGPUDisassembler : public MCDisassembler {
   template <typename InsnType>
   DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst,
                              uint64_t Address, raw_ostream &Comments) const;
-  template <typename InsnType>
-  DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
-                             MCInst &MI, InsnType Inst, uint64_t Address,
-                             raw_ostream &Comments) const;
 
   Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
                                ArrayRef<uint8_t> Bytes,
diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
index 5e89e34ca56e9..e7cc1b9b255ac 100644
--- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -239,8 +239,9 @@ defm : VInterpF16Pat<int_amdgcn_interp_p2_rtz_f16,
 
 multiclass VINTERP_Real_gfx11 <bits<7> op, string asmName> {
   defvar ps = !cast<VOP3_Pseudo>(NAME);
-  let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" #
-                           !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+  let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11",
+      // When decoding, attempt decoding IsRealTrue16 first, then the fake one.
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1) in {
     def _gfx11 :
       VINTERP_Real<ps, SIEncodingFamily.GFX11, asmName>,
       VINTERPe_gfx11<op, ps.Pfl>;
@@ -249,8 +250,9 @@ multiclass VINTERP_Real_gfx11 <bits<7> op, string asmName> {
 
 multiclass VINTERP_Real_gfx12 <bits<7> op, string asmName> {
   defvar ps = !cast<VOP3_Pseudo>(NAME);
-  let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" #
-                           !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+  let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12",
+      // When decoding, attempt decoding IsRealTrue16 first, then the fake one.
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1) in {
     def _gfx12 :
       VINTERP_Real<ps, SIEncodingFamily.GFX12, asmName>,
       VINTERPe_gfx12<op, ps.Pfl>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index f816d7de27ee4..0ce794a7c6c76 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -940,8 +940,8 @@ multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName,
                                    string asmName> {
   defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
   let AsmString = asmName # ps.AsmOperands,
-      DecoderNamespace = Gen.DecoderNamespace #
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+      DecoderNamespace = Gen.DecoderNamespace,
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1) in {
     defm NAME : VOP1_Real_e32<Gen, op, opName>;
   }
 }
@@ -961,8 +961,8 @@ multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
                                    string asmName> {
   defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
   let AsmString = asmName # ps.Pfl.AsmDPP16,
-      DecoderNamespace = Gen.DecoderNamespace #
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+      DecoderNamespace = Gen.DecoderNamespace,
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1) in {
     defm NAME : VOP1_Real_dpp<Gen, op, opName>;
   }
 }
@@ -977,8 +977,8 @@ multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
                                     string asmName> {
   defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
   let AsmString = asmName # ps.Pfl.AsmDPP8,
-      DecoderNamespace = Gen.DecoderNamespace #
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+      DecoderNamespace = Gen.DecoderNamespace,
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1) in {
     if !not(ps.Pfl.HasExt64BitDPP) then
       defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
   }
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index cff66aaedb11e..b698b2f24e2f2 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -127,8 +127,8 @@ class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic>
   VOP2_Real <ps, Gen.Subtarget, real_name> {
   let AssemblerPredicate = Gen.AssemblerPredicate;
   let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
-  let DecoderNamespace = Gen.DecoderNamespace#
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+  let DecoderNamespace = Gen.DecoderNamespace;
+  let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
 }
 
 class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -1517,8 +1517,8 @@ class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
     VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
   let AssemblerPredicate = Gen.AssemblerPredicate;
   let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
-  let DecoderNamespace = Gen.DecoderNamespace#
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+  let DecoderNamespace = Gen.DecoderNamespace;
+  let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
 }
 
 class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
@@ -1547,8 +1547,8 @@ class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
     VOP2_DPP8<op, ps, p> {
   let AssemblerPredicate = Gen.AssemblerPredicate;
   let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
-  let DecoderNamespace = Gen.DecoderNamespace#
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+  let DecoderNamespace = Gen.DecoderNamespace;
+  let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 2730ec52294e9..82f2fc7df3006 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -310,6 +310,7 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
   let SubtargetPredicate = AssemblerPredicate;
 
   string DecoderNamespace; // dummy
+  int DecodeOrder; // dummy
 }
 
 multiclass VOPCInstAliases <string old_name, string Arch, string real_name = old_name, string mnemonic_from = real_name> {
@@ -1677,7 +1678,8 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
                                                      pseudo_mnemonic),
                               asm_name, ps64.AsmVariantName>;
 
-    let DecoderNamespace = Gen.DecoderNamespace # !if(ps32.Pfl.IsRealTrue16, "", "_FAKE16") in {
+    let DecoderNamespace = Gen.DecoderNamespace,
+        DecodeOrder = !if(ps32.Pfl.IsRealTrue16, 0, 1) in {
       def _e32#Gen.Suffix :
         // 32 and 64 bit forms of the instruction have _e32 and _e64
         // respectively appended to their assembly mnemonic.
@@ -1753,7 +1755,7 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
           def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
         }
       } // end if ps64.Pfl.HasExtVOP3DPP
-    } // End DecoderNamespace
+    } // End DecoderOrder
   } // End AssemblerPredicate
 }
 
@@ -1824,7 +1826,8 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
                                                      pseudo_mnemonic),
                               asm_name, ps64.AsmVariantName>;
 
-    let DecoderNamespace = Gen.DecoderNamespace # !if(ps32.Pfl.IsRealTrue16, "", "_FAKE16") in {
+    let DecoderNamespace = Gen.DecoderNamespace,
+        DecodeOrder = !if(ps32.Pfl.IsRealTrue16, 0, 1) in {
       def _e32#Gen.Suffix
           : VOPC_Real<ps32, Gen.Subtarget, asm_name>,
             VOPCe<op{7-0}> {
@@ -1880,7 +1883,7 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
           }
         }
       } // End if ps64.Pfl.HasExtVOP3DPP
-    } // End DecoderNamespace
+    } // End DecodeOrder
   } // End AssemblerPredicate
 }
 
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 5550a0c08b918..edc03358c0652 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -204,8 +204,8 @@ class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> :
   VOP3_Real <ps, Gen.Subtarget, asm_name> {
   let AssemblerPredicate = Gen.AssemblerPredicate;
   let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
-  let DecoderNamespace = Gen.DecoderNamespace#
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+  let DecoderNamespace = Gen.DecoderNamespace;
+  let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
 }
 
 // XXX - Is there any reason to distinguish this from regular VOP3
@@ -1705,8 +1705,8 @@ class VOP3_DPP16_Gen_t16<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
   let True16Predicate =
       !if (ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
   let AssemblerPredicate = Gen.AssemblerPredicate;
-  let DecoderNamespace =
-      Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16");
+  let DecoderNamespace = Gen.DecoderNamespace;
+  let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
 }
 
 class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
@@ -1816,7 +1816,8 @@ multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string asmName, string opN
                              bit isSingle = 0> {
   defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
   let AsmString = asmName # ps.AsmOperands,
-      DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
+      DecoderNamespace = Gen.DecoderNamespace,
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1),
       IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
     def _e64#Gen.Suffix :
       VOP3_Real_Gen<ps, Gen>,
@@ -1886,8 +1887,8 @@ multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName, string
   def _e64_dpp#Gen.Suffix :
     VOP3_DPP16_Gen_t16<op, ps, Gen> {
       let AsmString = asmName # ps.Pfl.AsmVOP3DPP16;
-      let DecoderNamespace = Gen.DecoderNamespace
-                             # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+      let DecoderNamespace = Gen.DecoderNamespace;
+      let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
       let Inst{11} = ?;
       let Inst{12} = ?;
     }
@@ -1915,8 +1916,8 @@ multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, strin
     let Inst{11} = ?;
     let Inst{12} = ?;
     let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
-    let DecoderNamespace = Gen.DecoderNamespace
-                           # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+    let DecoderNamespace = Gen.DecoderNamespace;
+    let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
     let AssemblerPredicate = Gen.AssemblerPredicate;
   }
 }
@@ -1925,8 +1926,8 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName,
                                     string asmName> {
   defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
   let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
-      DecoderNamespace = Gen.DecoderNamespace#
-                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
+      DecoderNamespace = Gen.DecoderNamespace,
+      DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1),
       True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
                             NoTrue16Predicate) in {
     defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>;
@@ -2006,8 +2007,8 @@ multiclass VOP3_BITOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName> {
 multiclass VOP3_BITOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName> {
   defvar ps = !cast<VOP3_Pseudo>(NAME#"_e64");
   def _e64_dpp8#Gen.Suffix : VOP3_BITOP3_DPP8<op, ps, asmName> {
-    let DecoderNamespace =
-      Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16");
+    let DecoderNamespace = Gen.DecoderNamespace;
+    let DecodeOrder = !if(ps.Pfl.IsRealTrue16, 0, 1);
     let AssemblerPredicate = Gen.AssemblerPredicate;
   }
 }
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 89df9d82f8780..6bfa941c2d13e 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -689,9 +689,14 @@ static constexpr DecoderListEntry DecoderList32[]{
     {DecoderTableXMIPS32, XMIPSGroup, "Mips extensions"},
     {DecoderTableXAndes32, XAndesGroup, "Andes extensions"},
     {DecoderTableXSMT32, XSMTGroup, "SpacemiT extensions"},
-    // Standard Extensions
+
+    // Standard Extensions.
+    // The decoder order within this table is as follows:
+    // standard 32-bit instructions : 0
+    // RV32-only standard 32-bit instructions : 1
+    // Zfinx (Float in Integer) : 2 (TBD)
+    // RV32-only Zdinx (Double in Integer) : 3 (TBD)
     {DecoderTable32, {}, "standard 32-bit instructions"},
-    {DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"},
     {DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"},
     {DecoderTableZdinxRV32Only32, {}, "RV32-only Zdinx (Double in Integer)"},
 };
@@ -739,15 +744,16 @@ static constexpr DecoderListEntry DecoderList16[]{
      {RISCV::FeatureVendorXqccmp},
      "Xqccmp (Qualcomm 16-bit Push/Pop & Double Move Instructions)"},
     {DecoderTableXwchc16, {RISCV::FeatureVendorXwchc}, "WCH QingKe XW"},
+
     // Standard Extensions
+    // Instructions in this table have the following decoding order:
+    // Zicfiss (Shadow Stack 16-bit) : -1
+    // standard 16-bit instructions : 0
+    // RV32-only 16-bit instructions : 1
+    // ZcOverlap (16-bit Instructions overlapping with Zcf/Zcd): 2
+
     // DecoderTableZicfiss16 must be checked before DecoderTable16.
-    {DecoderTableZicfiss16, {}, "Zicfiss (Shadow Stack 16-bit)"},
     {DecoderTable16, {}, "standard 16-bit instructions"},
-    {DecoderTableRV32Only16, {}, "RV32-only 16-bit instructions"},
-    // Zc* instructions incompatible with Zcf or Zcd
-    {DecoderTableZcOverlap16,
-     {},
-     "ZcOverlap (16-bit Instructions overlapping with Zcf/Zcd)"},
 };
 
 DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 92552b36aa0b9..5c68e37eb552c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2312,6 +2312,20 @@ def : Pat<(i64 (add GPR:$rs1, negImm:$rs2)), (SUB GPR:$rs1, negImm:$rs2)>;
 let Predicates = [HasStdExtZihintpause] in
 def : Pat<(int_riscv_pause), (FENCE 0x1, 0x0)>;
 
+//===----------------------------------------------------------------------===//
+// Decoder orders for various instruction classes.
+//===----------------------------------------------------------------------===//
+
+// For 16 bit instructions.
+defvar DecodeOrderZicfiss16 = -1;
+defvar DecodeOrderRV32Only16 = 1;
+defvar DecodeOrderZcOverlap = 2;
+
+// For 32-bit instructions.
+defvar DecodeOrderRV32Only32 = 1;
+defvar DecodeOrderZfinx32 = 2;
+defvar DecodeOrderZdinxRV32Only32 = 2;
+
 //===----------------------------------------------------------------------===//
 // Standard extensions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 9fc73662d9704..738accb00fb99 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -379,7 +379,7 @@ def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, simm6:$imm),
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
-    DecoderNamespace = "RV32Only", Defs = [X1],
+    DecodeOrder = DecodeOrderRV32Only16, Defs = [X1],
     Predicates = [HasStdExtZca, IsRV32]  in
 def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins bare_simm12_lsb0:$offset),
                        "c.jal", "$offset">, Sched<[WriteJal]>;
@@ -542,7 +542,7 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>,
 
 } // Predicates = [HasStdExtZca]
 
-let DecoderNamespace = "RV32Only",
+let DecodeOrder = DecodeOrderRV32Only16,
     Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
   def C_FLW  : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
                Sched<[WriteFLD32, ReadFMemBase]> {
@@ -569,7 +569,7 @@ let DecoderNamespace = "RV32Only",
                  Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
     let Inst{8-7}  = imm{7-6};
   }
-} // DecoderNamespace = "RV32Only", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
+} // DecodeOrder = DecodeOrderRV32Only16
 
 let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
   def C_FLD  : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index cfa20cb016918..e4a14a0d71458 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -382,9 +382,9 @@ let Predicates = [HasStdExtP] in {
   def PSLLI_H  : RVPShiftH_ri<0b000, 0b010, "pslli.h">;
   def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def SSLAI    : RVPShiftW_ri<0b101, 0b010, "sslai">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PSLLI_W  : RVPShiftW_ri<0b000, 0b010, "pslli.w">;
   def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">;
@@ -431,7 +431,7 @@ let Predicates = [HasStdExtP] in {
 
   def PSSHAR_HS : RVPBinaryScalar_rr<0b111, 0b00, 0b010, "psshar.hs">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def SSHA      : RVPBinaryScalar_rr<0b110, 0b01, 0b010, "ssha">;
 
   def SSHAR     : RVPBinaryScalar_rr<0b111, 0b01, 0b010, "sshar">;
@@ -461,7 +461,7 @@ let Predicates = [HasStdExtP] in {
 
   def PSATI_H    : RVPShiftH_ri<0b110, 0b100, "psati.h">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def USATI_RV32 : RVPShiftW_ri<0b010, 0b100, "usati">;
 
   def SRARI_RV32 : RVPShiftW_ri<0b101, 0b100, "srari">;
@@ -542,7 +542,7 @@ let Predicates = [HasStdExtP] in {
   def PASUBU_H : RVPBinary_rr<0b1111, 0b00, 0b000, "pasubu.h">;
   def PASUBU_B : RVPBinary_rr<0b1111, 0b10, 0b000, "pasubu.b">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def SADD     : RVPBinary_rr<0b0010, 0b01, 0b000, "sadd">;
 
   def AADD     : RVPBinary_rr<0b0011, 0b01, 0b000, "aadd">;
@@ -558,7 +558,7 @@ let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
   def SSUBU    : RVPBinary_rr<0b1110, 0b01, 0b000, "ssubu">;
 
   def ASUBU    : RVPBinary_rr<0b1111, 0b01, 0b000, "asubu">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PADD_W   : RVPBinary_rr<0b0000, 0b01, 0b000, "padd.w">;
 
@@ -596,7 +596,7 @@ let Predicates = [HasStdExtP] in {
 
   def PDIFSUMAU_B  : RVPTernary_rrr<0b0111, 0b10, 0b001, "pdifsumau.b">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def MUL_H01      : RVPBinary_rr<0b0010, 0b01, 0b001, "mul.h01">;
 
   def MACC_H01     : RVPTernary_rrr<0b0011, 0b01, 0b001, "macc.h01">;
@@ -604,7 +604,7 @@ let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
   def MULU_H01     : RVPBinary_rr<0b0110, 0b01, 0b001, "mulu.h01">;
 
   def MACCU_H01    : RVPTernary_rrr<0b0111, 0b01, 0b001, "maccu.h01">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PMUL_W_H01   : RVPBinary_rr<0b0010, 0b01, 0b001, "pmul.w.h01">;
   def MUL_W01      : RVPBinary_rr<0b0010, 0b11, 0b001, "mul.w01">;
@@ -626,9 +626,9 @@ let Predicates = [HasStdExtP] in {
 
   def PSSH1SADD_H : RVPBinary_rr<0b0110, 0b00, 0b010, "pssh1sadd.h">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def SSH1SADD    : RVPBinary_rr<0b0110, 0b01, 0b010, "ssh1sadd">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PSH1ADD_W   : RVPBinary_rr<0b0100, 0b01, 0b010, "psh1add.w">;
 
@@ -658,7 +658,7 @@ let Predicates = [HasStdExtP] in {
 
   def PMULSU_H_B11  : RVPBinary_rr<0b1110, 0b00, 0b011, "pmulsu.h.b11">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def MUL_H00       : RVPBinary_rr<0b0000, 0b01, 0b011, "mul.h00">;
 
   def MACC_H00      : RVPTernary_rrr<0b0001, 0b01, 0b011, "macc.h00">;
@@ -682,7 +682,7 @@ let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
   def MULSU_H11     : RVPBinary_rr<0b1110, 0b01, 0b011, "mulsu.h11">;
 
   def MACCSU_H11    : RVPTernary_rrr<0b1111, 0b01, 0b011, "maccsu.h11">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PMUL_W_H00    : RVPBinary_rr<0b0000, 0b01, 0b011, "pmul.w.h00">;
   def MUL_W00       : RVPBinary_rr<0b0000, 0b11, 0b011, "mul.w00">;
@@ -732,13 +732,13 @@ let Predicates = [HasStdExtP] in {
 
   def PPACKT_H    : RVPBinary_rr<0b0110, 0b00, 0b100, "ppackt.h">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def PACKBT_RV32 : RVPBinary_rr<0b0010, 0b01, 0b100, "packbt">;
 
   def PACKTB_RV32 : RVPBinary_rr<0b0100, 0b01, 0b100, "packtb">;
 
   def PACKT_RV32  : RVPBinary_rr<0b0110, 0b01, 0b100, "packt">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PPACK_W     : RVPBinary_rr<0b0000, 0b01, 0b100, "ppack.w">;
 
@@ -791,10 +791,10 @@ let Predicates = [HasStdExtP] in {
   def PM2ADDASU_H : RVPBinary_rr<0b1101, 0b00, 0b101, "pm2addasu.h">;
   def PM4ADDASU_B : RVPBinary_rr<0b1101, 0b10, 0b101, "pm4addasu.b">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def MQACC_H01  : RVPTernary_rrr<0b1111, 0b00, 0b101, "mqacc.h01">;
   def MQRACC_H01 : RVPTernary_rrr<0b1111, 0b10, 0b101, "mqracc.h01">;
-} // // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PM2ADD_W      : RVPBinary_rr<0b0000, 0b01, 0b101, "pm2add.w">;
   def PM4ADD_H      : RVPBinary_rr<0b0000, 0b11, 0b101, "pm4add.h">;
@@ -870,13 +870,13 @@ let Predicates = [HasStdExtP] in {
   def PMAXU_H  : RVPBinary_rr<0b1111, 0b00, 0b110, "pmaxu.h">;
   def PMAXU_B  : RVPBinary_rr<0b1111, 0b10, 0b110, "pmaxu.b">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def MSEQ  : RVPBinary_rr<0b1000, 0b01, 0b110, "mseq">;
 
   def MSLT  : RVPBinary_rr<0b1010, 0b01, 0b110, "mslt">;
 
   def MSLTU : RVPBinary_rr<0b1011, 0b01, 0b110, "msltu">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 let Predicates = [HasStdExtP, IsRV64] in {
   def PAS_WX   : RVPBinary_rr<0b0000, 0b01, 0b110, "pas.wx">;
   def PSA_WX   : RVPBinary_rr<0b0000, 0b11, 0b110, "psa.wx">;
@@ -936,7 +936,7 @@ let Predicates = [HasStdExtP] in {
   def PMULQ_H       : RVPBinary_rr<0b1010, 0b00, 0b111, "pmulq.h">;
   def PMULQR_H      : RVPBinary_rr<0b1010, 0b10, 0b111, "pmulqr.h">;
 } // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
   def MULHR      : RVPBinary_rr<0b0000, 0b11, 0b111, "mulhr">;
 
   def MHACC      : RVPTernary_rrr<0b0001, 0b01, 0b111, "mhacc">;
@@ -972,7 +972,7 @@ let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
 
   def MQACC_H11  : RVPTernary_rrr<0b1111, 0b00, 0b111, "mqacc.h11">;
   def MQRACC_H11 : RVPTernary_rrr<0b1111, 0b10, 0b111, "mqracc.h11">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in
+} // Predicates = [HasStdExtP, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in
 let Predicates = [HasStdExtP, IsRV64] in {
   def PMULH_W       : RVPBinary_rr<0b0000, 0b01, 0b111, "pmulh.w">;
   def PMULHR_W      : RVPBinary_rr<0b0000, 0b11, 0b111, "pmulhr.w">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
index 7cf6d5ff762ff..5affd1aa47d4a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
@@ -59,7 +59,7 @@ let Predicates = [HasStdExtZacas], IsSignExtendingOpW = 1 in {
 defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>;
 } // Predicates = [HasStdExtZacas]
 
-let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Only"  in {
+let Predicates = [HasStdExtZacas, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
 defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>;
 } // Predicates = [HasStdExtZacas, IsRV32]
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index ed1a60aa49cab..6b80b552b6e53 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -216,7 +216,7 @@ def C_SH_INX : CStoreH_rri<0b100011, 0b0, "c.sh", GPRF16C>,
 } // Predicates = [HasStdExtZcb]
 
 // Zcmp
-let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp],
+let DecodeOrder = DecodeOrderZcOverlap, Predicates = [HasStdExtZcmp],
     hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
 let Defs = [X10, X11] in
 def CM_MVA01S : RVInst16CA<0b101011, 0b11, 0b10, (outs),
@@ -227,9 +227,9 @@ let Uses = [X10, X11] in
 def CM_MVSA01 : RVInst16CA<0b101011, 0b01, 0b10, (outs SR07:$rs1, SR07:$rs2),
                             (ins), "cm.mvsa01", "$rs1, $rs2">,
                 Sched<[WriteIALU, WriteIALU, ReadIALU, ReadIALU]>;
-} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp]...
+} // DecodeOrder = DecodeOrderZcOverlap, Predicates = [HasStdExtZcmp]...
 
-let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp] in {
+let DecodeOrder = DecodeOrderZcOverlap, Predicates = [HasStdExtZcmp] in {
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Uses = [X2], Defs = [X2] in
 def CM_PUSH : RVInstZcCPPP<0b11000, "cm.push", negstackadj>,
               Sched<[WriteIALU, ReadIALU, ReadStoreData, ReadStoreData,
@@ -258,9 +258,9 @@ def CM_POP : RVInstZcCPPP<0b11010, "cm.pop">,
              Sched<[WriteIALU, WriteLDW, WriteLDW, WriteLDW, WriteLDW,
                     WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW,
                     WriteLDW, WriteLDW, WriteLDW, ReadIALU]>;
-} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp]...
+} // DecodeOrder = DecodeOrderZcOverlap, Predicates = [HasStdExtZcmp]...
 
-let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmt],
+let DecodeOrder = DecodeOrderZcOverlap, Predicates = [HasStdExtZcmt],
     hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
 def CM_JT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm5:$index),
                        "cm.jt", "$index">{
@@ -278,7 +278,7 @@ def CM_JALT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm8ge32:$index),
   let Inst{12-10} = 0b000;
   let Inst{9-2} = index;
 }
-} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmt]...
+} // DecodeOrder = DecodeOrderZcOverlap, Predicates = [HasStdExtZcmt]...
 
 
 let Predicates = [HasStdExtZcb, HasStdExtZmmul] in{
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZclsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZclsd.td
index 962ad3c5a3151..1e6ffc8fe0488 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZclsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZclsd.td
@@ -41,7 +41,8 @@ def GPRPairCRV32 : RegisterOperand<GPRPairC> {
 // Instructions
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasStdExtZclsd, IsRV32], DecoderNamespace = "ZcOverlap" in {
+let Predicates = [HasStdExtZclsd, IsRV32],
+    DecodeOrder = DecodeOrderZcOverlap in {
 def C_LDSP_RV32 : CStackLoad<0b011, "c.ldsp", GPRPairNoX0RV32, uimm9_lsb000>,
                   Sched<[WriteLDD, ReadMemBase]> {
   let Inst{4-2} = imm{8-6};
@@ -65,7 +66,7 @@ def C_SD_RV32 : CStore_rri<0b111, "c.sd", GPRPairCRV32, uimm8_lsb000>,
   let Inst{12-10} = imm{5-3};
   let Inst{6-5} = imm{7-6};
 }
-}// Predicates = [HasStdExtZclsd, IsRV32], DecoderNamespace = "ZcOverlap"
+}// Predicates = [HasStdExtZclsd, IsRV32], DecodeOrder = DecodeOrderZcOverlap
 
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
index 50ebaa9951979..286e2bf722531 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
@@ -48,7 +48,7 @@ def SSPUSH : RVInstR<0b1100111, 0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs2),
 } // Predicates = [HasStdExtZicfiss]
 
 let Predicates = [HasStdExtZicfiss, HasStdExtZcmop],
-    DecoderNamespace = "Zicfiss" in {
+    DecodeOrder = DecodeOrderZicfiss16 in {
 let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
 def C_SSPUSH : RVC_SSInst<0b00001, GPRX1, "c.sspush">;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
index a3203f288b545..a6f58e06b7362 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
@@ -33,11 +33,11 @@ def riscv_st_rv32 : RVSDNode<"SD_RV32", SDT_RISCV_SD_RV32,
 // Instructions
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasStdExtZilsd, IsRV32], DecoderNamespace = "RV32Only" in {
+let Predicates = [HasStdExtZilsd, IsRV32], DecodeOrder = DecodeOrderRV32Only32 in {
 def LD_RV32 : Load_ri<0b011, "ld", GPRPairRV32>, Sched<[WriteLDD, ReadMemBase]>;
 def SD_RV32 : Store_rri<0b011, "sd", GPRPairRV32>,
               Sched<[WriteSTD, ReadStoreData, ReadMemBase]>;
-} // Predicates = [HasStdExtZilsd, IsRV32], DecoderNamespace = "RV32Only"
+} // Predicates = [HasStdExtZilsd, IsRV32], DecodeOrder = DecodeOrderRV32Only32
 
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 8747d02ac892b..5cf5dd93fbed5 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -116,6 +116,11 @@ static cl::opt<bool> IgnoreFullyDefinedOperands(
         "Do not automatically decode operands with no '?' in their encoding."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
+static cl::opt<bool> ResolveConflictsTryAll(
+    "resolve-conflicts-try-all",
+    cl::desc("Resolve conflicts by attempting to decode all candidates."),
+    cl::init(false), cl::cat(DisassemblerEmitterCat));
+
 STATISTIC(NumEncodings, "Number of encodings considered");
 STATISTIC(NumEncodingsLackingDisasm,
           "Number of encodings without disassembler info");
@@ -188,6 +193,9 @@ class InstructionEncoding {
   /// The namespace in which this encoding exists.
   StringRef DecoderNamespace;
 
+  /// The decoder order.
+  int64_t DecodeOrder;
+
   /// Known bits of this encoding. This is the value of the `Inst` field
   /// with any variable references replaced with '?'.
   KnownBits InstBits;
@@ -224,6 +232,9 @@ class InstructionEncoding {
   /// Returns the namespace in which this encoding exists.
   StringRef getDecoderNamespace() const { return DecoderNamespace; }
 
+  /// Returns the decoder order for this encoding.
+  int64_t getDecodeOrder() const { return DecodeOrder; }
+
   /// Returns the size of this encoding, in bits.
   unsigned getBitWidth() const { return InstBits.getBitWidth(); }
 
@@ -263,16 +274,20 @@ class InstructionEncoding {
   void parseFixedLenOperands(const BitsInit &Bits);
 };
 
-/// Sorting predicate to sort encoding IDs by encoding width.
-class LessEncodingIDByWidth {
+/// Sorting predicate to sort encoding IDs by encoding width. Within the same
+/// width, sort them by their decode order.
+class LessEncodingID {
   ArrayRef<InstructionEncoding> Encodings;
 
 public:
-  explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
+  explicit LessEncodingID(ArrayRef<InstructionEncoding> Encodings)
       : Encodings(Encodings) {}
 
   bool operator()(unsigned ID1, unsigned ID2) const {
-    return Encodings[ID1].getBitWidth() < Encodings[ID2].getBitWidth();
+    const InstructionEncoding &E1 = Encodings[ID1];
+    const InstructionEncoding &E2 = Encodings[ID2];
+    return std::tuple(E1.getBitWidth(), E1.getDecodeOrder()) <
+           std::tuple(E2.getBitWidth(), E2.getDecodeOrder());
   }
 };
 
@@ -517,6 +532,13 @@ class FilterChooser {
   /// The "field value" here refers to the encoding bits in the filtered range.
   std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
 
+  /// Per decode order filter choosers. Applicable when the set of candidates
+  /// have more than 1 decode order.
+  SmallVector<std::unique_ptr<const FilterChooser>> PerDecodeOrderChoosers;
+
+  /// Handle this chooser by attempting to decode all endodings.
+  bool AttemptAll = false;
+
   /// Set to true if decoding conflict was encountered.
   bool HasConflict = false;
 
@@ -532,7 +554,7 @@ class FilterChooser {
                 ArrayRef<unsigned> EncodingIDs)
       : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(nullptr) {
     // Sort encoding IDs once.
-    stable_sort(this->EncodingIDs, LessEncodingIDByWidth(Encodings));
+    stable_sort(this->EncodingIDs, LessEncodingID(Encodings));
     // Filter width is the width of the smallest encoding.
     unsigned FilterWidth = Encodings[this->EncodingIDs.front()].getBitWidth();
     FilterBits = KnownBits(FilterWidth);
@@ -545,7 +567,7 @@ class FilterChooser {
                 const FilterChooser &Parent)
       : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(&Parent) {
     // Inferior filter choosers are created from sorted array of encoding IDs.
-    assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
+    assert(is_sorted(EncodingIDs, LessEncodingID(Encodings)));
     assert(!FilterBits.hasConflict() && "Broken filter");
     // Filter width is the width of the smallest encoding.
     unsigned FilterWidth = Encodings[EncodingIDs.front()].getBitWidth();
@@ -584,6 +606,17 @@ class FilterChooser {
   // decoded bits in order to verify that the instruction matches the Opcode.
   std::vector<Island> getIslands(const KnownBits &EncodingBits) const;
 
+  int64_t getDecodeOrder(unsigned ID) const {
+    return Encodings[ID].getDecodeOrder();
+  }
+
+  /// Returns true if the set of encoding IDs have more than one decode order.
+  bool hasMultipleDecodeOrders() const;
+
+  // Split the set of candidate encodings into one bucket per decode order and
+  // create inferior FilterChoosers per bucket.
+  void splitByDecodeOrder();
+
   /// Scans the well-known encoding bits of the encodings and, builds up a list
   /// of candidate filters, and then returns the best one, if any.
   std::unique_ptr<Filter> findBestFilter(ArrayRef<bitAttr_t> BitAttrs,
@@ -685,6 +718,44 @@ void FilterChooser::applyFilter(const Filter &F) {
   NumBits = F.NumBits;
   assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
 
+  // When a filter has both fixed and variable encodings, we give priority to
+  // the fixed encoding (FilteredIDs) and if that fails, we attempt the variable
+  // encodings. See DecoderTableBuilder::emitTableEntries. This order may not be
+  // the right order for certain backends. To control this, they can use the
+  // DecodeOrder. If we have multiple decode orders, the filter chooser will
+  // attempt the fixed-then-variable encoding per decode order, so if we want
+  // certain variable encoding to be prioritized over fixed ones, the fixed ones
+  // can get a larger decode order.
+
+  // If we have multiple decode order, we want to attempt decoding in the
+  // following order: fixed0, variable0, fixed1, variable1 etc.
+  // If we do not split, we will attempt to decode as: fixed, variable.
+  // That may be ok if all fixed IDs have decode order <= all variable IDs, that
+  // is max(fixed decode order) <= min(variable decode order). Otherwise we
+  // split per decode order.
+  if (hasMultipleDecodeOrders() && !F.VariableIDs.empty() &&
+      !F.FilteredIDs.empty()) {
+    auto LessDecodeOrder = [&](unsigned A, unsigned B) {
+      return getDecodeOrder(A) < getDecodeOrder(B);
+    };
+
+    int64_t MaxFixedOrder = std::numeric_limits<int64_t>::min();
+    for (ArrayRef<unsigned> InferiorEncodingIDs :
+         make_second_range(F.FilteredIDs)) {
+      auto MaxIt = llvm::max_element(InferiorEncodingIDs, LessDecodeOrder);
+      int64_t CurrentMax = getDecodeOrder(*MaxIt);
+      MaxFixedOrder = std::max(MaxFixedOrder, CurrentMax);
+    }
+
+    auto MinIt = llvm::min_element(F.VariableIDs, LessDecodeOrder);
+    int64_t MinVariableOrder = getDecodeOrder(*MinIt);
+
+    if (MaxFixedOrder > MinVariableOrder) {
+      splitByDecodeOrder();
+      return;
+    }
+  }
+
   if (!F.VariableIDs.empty()) {
     // Delegates to an inferior filter chooser for further processing on this
     // group of instructions whose segment values are variable.
@@ -1580,6 +1651,42 @@ std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
   return nullptr;
 }
 
+bool FilterChooser::hasMultipleDecodeOrders() const {
+  if (EncodingIDs.size() <= 1)
+    return false;
+  // Encodings are sorted by decoder order, so there are multiple decode orders
+  // if the first and last decode order do not match.
+  return getDecodeOrder(EncodingIDs.front()) !=
+         getDecodeOrder(EncodingIDs.back());
+}
+
+void FilterChooser::splitByDecodeOrder() {
+  if (!hasMultipleDecodeOrders())
+    PrintFatalError("Cannot split by decode orders for a single decode order");
+
+  // Create one inferior FilterChooser per decode order span.
+  ArrayRef<unsigned> IDs = ArrayRef(EncodingIDs);
+  int64_t LastOrder = getDecodeOrder(IDs.front());
+  size_t LastIndex = 0;
+  // Note: first iteration here is redundant, but this allows us to keep Idx
+  // value correct.
+  for (const auto &[Idx, ID] : enumerate(IDs)) {
+    int64_t CurrentOrder = getDecodeOrder(ID);
+    if (CurrentOrder != LastOrder) {
+      ArrayRef<unsigned> SubIDs = IDs.slice(LastIndex, Idx - LastIndex);
+      PerDecodeOrderChoosers.push_back(std::make_unique<FilterChooser>(
+          Encodings, SubIDs, FilterBits, *this));
+      LastOrder = CurrentOrder;
+      LastIndex = Idx;
+    }
+  }
+
+  // Finish the last span.
+  ArrayRef<unsigned> SubIDs = IDs.slice(LastIndex, IDs.size() - LastIndex);
+  PerDecodeOrderChoosers.push_back(
+      std::make_unique<FilterChooser>(Encodings, SubIDs, FilterBits, *this));
+}
+
 // Decides on the best configuration of filter(s) to use in order to decode
 // the instructions.  A conflict of instructions may occur, in which case we
 // dump the conflict set to the standard error.
@@ -1598,6 +1705,18 @@ void FilterChooser::doFilter() {
     return;
   }
 
+  // If there are multiple decode orders, then splin the candidates by decode
+  // order if we are unable to find a filter.
+  if (hasMultipleDecodeOrders()) {
+    splitByDecodeOrder();
+    return;
+  }
+
+  if (ResolveConflictsTryAll) {
+    AttemptAll = true;
+    return;
+  }
+
   // Print out useful conflict information for postmortem analysis.
   errs() << "Decoding Conflict:\n";
   dump();
@@ -1626,10 +1745,10 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
 
   // If there are other encodings that could match if those with all bits
   // known don't, enter a scope so that they have a chance.
-  size_t FixupLoc = 0;
+  size_t VarScopeFixupLoc = 0;
   if (FC.VariableFC) {
     Table.insertOpcode(MCD::OPC_Scope);
-    FixupLoc = Table.insertNumToSkip();
+    VarScopeFixupLoc = Table.insertNumToSkip();
   }
 
   if (FC.SingletonEncodingID) {
@@ -1649,7 +1768,7 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
 
     // Emit table entries for the only case.
     emitTableEntries(*Delegate);
-  } else {
+  } else if (FC.FilterChooserMap.size() > 1) {
     // The general case: emit a switch over the field value.
     Table.insertOpcode(MCD::OPC_ExtractField);
     Table.insertULEB128(FC.StartBit);
@@ -1676,10 +1795,34 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
 
     // Emit table entries for the last case.
     emitTableEntries(*Delegate);
+  } else if (FC.PerDecodeOrderChoosers.size() > 1) {
+    // Attempt to decode all inferior choosers and allow them to fail, except
+    // the last one.
+    for (const auto &Delegate : drop_end(FC.PerDecodeOrderChoosers)) {
+      Table.insertOpcode(MCD::OPC_Scope);
+      unsigned FixupLoc = Table.insertNumToSkip();
+      emitTableEntries(*Delegate);
+      Table.patchNumToSkip(FixupLoc, Table.size());
+    }
+    emitTableEntries(*FC.PerDecodeOrderChoosers.back());
+  } else if (FC.AttemptAll) {
+    // Attempt all encoding and allow them to fail, except the last one.
+    for (const auto ID : drop_end(FC.EncodingIDs)) {
+      Table.insertOpcode(MCD::OPC_Scope);
+      unsigned FixupLoc = Table.insertNumToSkip();
+      FilterChooser SingletonFC(FC.Encodings, ID, FC.FilterBits, *FC.Parent);
+      emitSingletonTableEntry(SingletonFC);
+      Table.patchNumToSkip(FixupLoc, Table.size());
+    }
+    FilterChooser LastSingletonFC(FC.Encodings, FC.EncodingIDs.back(),
+                                  FC.FilterBits, *FC.Parent);
+    emitSingletonTableEntry(LastSingletonFC);
+  } else {
+    llvm_unreachable("FilterChooser not setup to do any filtering");
   }
 
   if (FC.VariableFC) {
-    Table.patchNumToSkip(FixupLoc, Table.size());
+    Table.patchNumToSkip(VarScopeFixupLoc, Table.size());
     emitTableEntries(*FC.VariableFC);
   }
 }
@@ -2070,6 +2213,7 @@ InstructionEncoding::InstructionEncoding(const Record *EncodingDef,
   Name.append(InstDef->getName());
 
   DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
+  DecodeOrder = EncodingDef->getValueAsInt("DecodeOrder");
   DecoderMethod = EncodingDef->getValueAsString("DecoderMethod");
   if (!DecoderMethod.empty())
     HasCompleteDecoder = EncodingDef->getValueAsBit("hasCompleteDecoder");

>From ee404cb3d6e2bc4f902e7828b7e9d643342a8d64 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 10 Sep 2025 15:39:03 -0700
Subject: [PATCH 2/5] Rework some comments

---
 llvm/utils/TableGen/DecoderEmitter.cpp | 50 ++++++++++++++++----------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 5cf5dd93fbed5..1265850910cfe 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -193,7 +193,7 @@ class InstructionEncoding {
   /// The namespace in which this encoding exists.
   StringRef DecoderNamespace;
 
-  /// The decoder order.
+  /// The decode order.
   int64_t DecodeOrder;
 
   /// Known bits of this encoding. This is the value of the `Inst` field
@@ -232,7 +232,7 @@ class InstructionEncoding {
   /// Returns the namespace in which this encoding exists.
   StringRef getDecoderNamespace() const { return DecoderNamespace; }
 
-  /// Returns the decoder order for this encoding.
+  /// Returns the decode order for this encoding.
   int64_t getDecodeOrder() const { return DecodeOrder; }
 
   /// Returns the size of this encoding, in bits.
@@ -533,7 +533,7 @@ class FilterChooser {
   std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
 
   /// Per decode order filter choosers. Applicable when the set of candidates
-  /// have more than 1 decode order.
+  /// have more than 1 decode orders.
   SmallVector<std::unique_ptr<const FilterChooser>> PerDecodeOrderChoosers;
 
   /// Handle this chooser by attempting to decode all endodings.
@@ -606,6 +606,7 @@ class FilterChooser {
   // decoded bits in order to verify that the instruction matches the Opcode.
   std::vector<Island> getIslands(const KnownBits &EncodingBits) const;
 
+  // Returns the decode order for the given encoding ID.
   int64_t getDecodeOrder(unsigned ID) const {
     return Encodings[ID].getDecodeOrder();
   }
@@ -721,18 +722,25 @@ void FilterChooser::applyFilter(const Filter &F) {
   // When a filter has both fixed and variable encodings, we give priority to
   // the fixed encoding (FilteredIDs) and if that fails, we attempt the variable
   // encodings. See DecoderTableBuilder::emitTableEntries. This order may not be
-  // the right order for certain backends. To control this, they can use the
+  // the right order for certain targets. To control this, they can use the
   // DecodeOrder. If we have multiple decode orders, the filter chooser will
-  // attempt the fixed-then-variable encoding per decode order, so if we want
-  // certain variable encoding to be prioritized over fixed ones, the fixed ones
-  // can get a larger decode order.
-
-  // If we have multiple decode order, we want to attempt decoding in the
-  // following order: fixed0, variable0, fixed1, variable1 etc.
-  // If we do not split, we will attempt to decode as: fixed, variable.
-  // That may be ok if all fixed IDs have decode order <= all variable IDs, that
-  // is max(fixed decode order) <= min(variable decode order). Otherwise we
-  // split per decode order.
+  // effectively attempt the fixed-then-variable encoding per decode order. This
+  // allows targets to prioritize certain variable encoding over fixed ones by
+  // assigning the fixed ones a larger decode order.
+
+  // If we always split per decode order, we get the following attempt order
+  //   f[0] -> v[0] -> f[1] -> v[1]
+  //
+  // However, instead of always splitting a chooser with multiple decode orders
+  // we can split only when necessary and only the smallest number of splits
+  // as long as we effectively get the same attempt order as above. As an
+  // example, if the max decode order among the fixed encoding is <= min
+  // decode order among varying encoding, then even if we do not split, we get
+  // the same effective attempt order. This simple criteria is implemented
+  // below (i.e, we fully split when this criteria is not met). There may be
+  // more refined ways to do the decode order splitting. For example, not
+  // splitting all the decode orders fully put doing partial splits. This can
+  // be implemented in future as an optimization if desired.
   if (hasMultipleDecodeOrders() && !F.VariableIDs.empty() &&
       !F.FilteredIDs.empty()) {
     auto LessDecodeOrder = [&](unsigned A, unsigned B) {
@@ -1654,7 +1662,7 @@ std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
 bool FilterChooser::hasMultipleDecodeOrders() const {
   if (EncodingIDs.size() <= 1)
     return false;
-  // Encodings are sorted by decoder order, so there are multiple decode orders
+  // Encodings are sorted by decode order, so there are multiple decode orders
   // if the first and last decode order do not match.
   return getDecodeOrder(EncodingIDs.front()) !=
          getDecodeOrder(EncodingIDs.back());
@@ -1669,7 +1677,7 @@ void FilterChooser::splitByDecodeOrder() {
   int64_t LastOrder = getDecodeOrder(IDs.front());
   size_t LastIndex = 0;
   // Note: first iteration here is redundant, but this allows us to keep Idx
-  // value correct.
+  // value correct (as opposed to using Idx + 1).
   for (const auto &[Idx, ID] : enumerate(IDs)) {
     int64_t CurrentOrder = getDecodeOrder(ID);
     if (CurrentOrder != LastOrder) {
@@ -1705,8 +1713,9 @@ void FilterChooser::doFilter() {
     return;
   }
 
-  // If there are multiple decode orders, then splin the candidates by decode
-  // order if we are unable to find a filter.
+  // If we were unable to find a useful filter and there are multiple decode
+  // orders involved, split the candidates by decode order and create per decode
+  // order choosers.
   if (hasMultipleDecodeOrders()) {
     splitByDecodeOrder();
     return;
@@ -1797,7 +1806,8 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
     emitTableEntries(*Delegate);
   } else if (FC.PerDecodeOrderChoosers.size() > 1) {
     // Attempt to decode all inferior choosers and allow them to fail, except
-    // the last one.
+    // the last one. Note that `PerDecodeOrderChoosers` when preset is expected
+    // to have atleast 2 entries, hence the size() > 1 check above.
     for (const auto &Delegate : drop_end(FC.PerDecodeOrderChoosers)) {
       Table.insertOpcode(MCD::OPC_Scope);
       unsigned FixupLoc = Table.insertNumToSkip();
@@ -1807,6 +1817,8 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
     emitTableEntries(*FC.PerDecodeOrderChoosers.back());
   } else if (FC.AttemptAll) {
     // Attempt all encoding and allow them to fail, except the last one.
+    // We expect here to have > 1 EncodingIDs, else we could have created a
+    // singleton chooser.
     for (const auto ID : drop_end(FC.EncodingIDs)) {
       Table.insertOpcode(MCD::OPC_Scope);
       unsigned FixupLoc = Table.insertNumToSkip();

>From bc83e8193fc1ef5f21a70806e6a9f9a345d49a0d Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 10 Sep 2025 17:58:34 -0700
Subject: [PATCH 3/5] Print decode order when dumping

---
 llvm/utils/TableGen/DecoderEmitter.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 1265850910cfe..b4305cf6cd66f 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -1740,12 +1740,17 @@ void FilterChooser::dump() const {
   // Dump filter stack.
   dumpStack(errs(), Indent, PadToWidth);
 
+  bool PrintDecoderOrder = hasMultipleDecodeOrders();
+
   // Dump encodings.
   for (unsigned EncodingID : EncodingIDs) {
     const InstructionEncoding &Encoding = Encodings[EncodingID];
     errs() << Indent << indent(PadToWidth - Encoding.getBitWidth());
     printKnownBits(errs(), Encoding.getMandatoryBits(), '_');
-    errs() << "  " << Encoding.getName() << '\n';
+    errs() << "  " << Encoding.getName();
+    if (PrintDecoderOrder)
+      errs() << " (" << Encoding.getDecodeOrder() << ")";
+    errs() << '\n';
   }
 }
 

>From b6ea8cec04601c4007e48f3c5f719bdaffde8d8c Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 11 Sep 2025 00:24:36 -0700
Subject: [PATCH 4/5] Fix resolve-conflicts-try-all using a non-failing scope,
 adopt it for RISCV 16 bit insts

---
 llvm/include/llvm/MC/MCDecoderOps.h     |   1 +
 llvm/lib/Target/RISCV/CMakeLists.txt    |   2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.td |   4 +-
 llvm/utils/TableGen/DecoderEmitter.cpp  | 102 ++++++++++++++++++------
 4 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/MC/MCDecoderOps.h b/llvm/include/llvm/MC/MCDecoderOps.h
index 790ff3eb4f333..3636748f5924b 100644
--- a/llvm/include/llvm/MC/MCDecoderOps.h
+++ b/llvm/include/llvm/MC/MCDecoderOps.h
@@ -17,6 +17,7 @@ namespace llvm::MCD {
 // enabled.
 enum DecoderOps {
   OPC_Scope = 1,         // OPC_Scope(nts_t NumToSkip)
+  OPC_ScopeNoFail,       // OPC_ScopeNoFail(nts_t NumToSkip)
   OPC_ExtractField,      // OPC_ExtractField(uleb128 Start, uint8_t Len)
   OPC_FilterValueOrSkip, // OPC_FilterValueOrSkip(uleb128 Val, nts_t NumToSkip)
   OPC_FilterValue,       // OPC_FilterValue(uleb128 Val)
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 0ff178e1f1959..7957176c3b6e7 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -8,7 +8,7 @@ tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter)
 tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred)
 tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler
-              --specialize-decoders-per-bitwidth)
+              --specialize-decoders-per-bitwidth --resolve-conflicts-try-all)
 tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 5c68e37eb552c..e48d2ebddc249 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2318,8 +2318,8 @@ def : Pat<(int_riscv_pause), (FENCE 0x1, 0x0)>;
 
 // For 16 bit instructions.
 defvar DecodeOrderZicfiss16 = -1;
-defvar DecodeOrderRV32Only16 = 1;
-defvar DecodeOrderZcOverlap = 2;
+defvar DecodeOrderRV32Only16 = 0;//1;
+defvar DecodeOrderZcOverlap = 0;//2;
 
 // For 32-bit instructions.
 defvar DecodeOrderRV32Only32 = 1;
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index b4305cf6cd66f..cb0f930170fd2 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -801,6 +801,26 @@ unsigned Filter::usefulness() const {
 //                              //
 //////////////////////////////////
 
+static StringRef getDecoderOpName(uint8_t Op) {
+  // clang-format off
+  static constexpr StringLiteral Names[] = {
+    "OPC_Scope",
+    "OPC_ScopeNoFail",
+    "OPC_ExtractField",
+    "OPC_FilterValueOrSkip",
+    "OPC_FilterValue",
+    "OPC_CheckField",
+    "OPC_CheckPredicate",
+    "OPC_Decode",
+    "OPC_TryDecode",
+    "OPC_SoftFail",
+  };
+  // clang-format on
+  if (Op >= MCD::OPC_Scope && Op <= MCD::OPC_SoftFail)
+    return Names[Op - MCD::OPC_Scope];
+  llvm_unreachable("Unknown decoder op");
+}
+
 // Emit the decoder state machine table. Returns a mask of MCD decoder ops
 // that were emitted.
 unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
@@ -880,20 +900,19 @@ unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
 
     const uint8_t DecoderOp = *I++;
     OpcodeMask |= (1 << DecoderOp);
+    OS << "  MCD::" << getDecoderOpName(DecoderOp) << ", ";
     switch (DecoderOp) {
     default:
       PrintFatalError("Invalid decode table opcode: " + Twine((int)DecoderOp) +
                       " at index " + Twine(Pos));
-    case MCD::OPC_Scope: {
-      OS << "  MCD::OPC_Scope, ";
+    case MCD::OPC_Scope:
+    case MCD::OPC_ScopeNoFail: {
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       emitNumToSkipComment(NumToSkip);
       OS << '\n';
       break;
     }
     case MCD::OPC_ExtractField: {
-      OS << "  MCD::OPC_ExtractField, ";
-
       // ULEB128 encoded start value.
       const char *ErrMsg = nullptr;
       unsigned Start = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
@@ -908,7 +927,6 @@ unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
       break;
     }
     case MCD::OPC_FilterValueOrSkip: {
-      OS << "  MCD::OPC_FilterValueOrSkip, ";
       // The filter value is ULEB128 encoded.
       emitULEB128(I, OS);
       uint32_t NumToSkip = emitNumToSkip(I, OS);
@@ -917,14 +935,12 @@ unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
       break;
     }
     case MCD::OPC_FilterValue: {
-      OS << "  MCD::OPC_FilterValue, ";
       // The filter value is ULEB128 encoded.
       emitULEB128(I, OS);
       OS << '\n';
       break;
     }
     case MCD::OPC_CheckField: {
-      OS << "  MCD::OPC_CheckField, ";
       // ULEB128 encoded start value.
       emitULEB128(I, OS);
       // 8-bit length.
@@ -936,7 +952,6 @@ unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
       break;
     }
     case MCD::OPC_CheckPredicate: {
-      OS << "  MCD::OPC_CheckPredicate, ";
       emitULEB128(I, OS);
       OS << '\n';
       break;
@@ -949,7 +964,6 @@ unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
       unsigned Opc = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
       assert(ErrMsg == nullptr && "ULEB128 value too large!");
 
-      OS << "  MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, ";
       emitULEB128(I, OS);
 
       // Decoder index.
@@ -970,7 +984,6 @@ unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
       break;
     }
     case MCD::OPC_SoftFail: {
-      OS << "  MCD::OPC_SoftFail, ";
       // Decode the positive mask.
       const char *ErrMsg = nullptr;
       uint64_t PositiveMask = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
@@ -1814,7 +1827,7 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
     // the last one. Note that `PerDecodeOrderChoosers` when preset is expected
     // to have atleast 2 entries, hence the size() > 1 check above.
     for (const auto &Delegate : drop_end(FC.PerDecodeOrderChoosers)) {
-      Table.insertOpcode(MCD::OPC_Scope);
+      Table.insertOpcode(MCD::OPC_ScopeNoFail);
       unsigned FixupLoc = Table.insertNumToSkip();
       emitTableEntries(*Delegate);
       Table.patchNumToSkip(FixupLoc, Table.size());
@@ -1825,7 +1838,7 @@ void DecoderTableBuilder::emitTableEntries(const FilterChooser &FC) const {
     // We expect here to have > 1 EncodingIDs, else we could have created a
     // singleton chooser.
     for (const auto ID : drop_end(FC.EncodingIDs)) {
-      Table.insertOpcode(MCD::OPC_Scope);
+      Table.insertOpcode(MCD::OPC_ScopeNoFail);
       unsigned FixupLoc = Table.insertNumToSkip();
       FilterChooser SingletonFC(FC.Encodings, ID, FC.FilterBits, *FC.Parent);
       emitSingletonTableEntry(SingletonFC);
@@ -2305,7 +2318,26 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
   }
 
   OS << R"(
-  SmallVector<const uint8_t *, 8> ScopeStack;
+  struct Scope {
+    const uint8_t *SkipTo;
+    // Indicates a non failing scope, which means we will never return with
+    // failure from the decode function when in this scope. It also implies that
+    // any nested scopes within it will be forced to be non-failing as well.
+    bool NoFail;
+    Scope(const uint8_t *SkipTo, bool NoFail)
+      : SkipTo(SkipTo), NoFail(NoFail) {}
+  };
+
+  SmallVector<Scope, 8> ScopeStack;
+
+  // Returns if we are allowed to fail and return from the function in the
+  // current scope.
+  auto CanReturnOnFailure = [&ScopeStack]() -> bool {
+    if (ScopeStack.empty())
+      return true;
+    return !ScopeStack.back().NoFail;
+  };
+
   uint64_t CurFieldValue = 0;
   DecodeStatus S = MCDisassembler::Success;
   while (true) {
@@ -2316,12 +2348,18 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       errs() << Loc << ": Unexpected decode table opcode: "
              << (int)DecoderOp << '\n';
       return MCDisassembler::Fail;
-    case MCD::OPC_Scope: {
+    case MCD::OPC_Scope:
+    case MCD::OPC_ScopeNoFail: {
       unsigned NumToSkip = decodeNumToSkip(Ptr);
       const uint8_t *SkipTo = Ptr + NumToSkip;
-      ScopeStack.push_back(SkipTo);
-      LLVM_DEBUG(dbgs() << Loc << ": OPC_Scope(" << SkipTo - DecodeTable
-                        << ")\n");
+      bool NoFail = DecoderOp == MCD::OPC_ScopeNoFail;
+      NoFail |= !CanReturnOnFailure();
+      ScopeStack.emplace_back(SkipTo, NoFail);
+      LLVM_DEBUG({
+        const char *OpName = DecoderOp == MCD::OPC_Scope ? "OPC_Scope" : "OPC_ScopeNoFail";
+        dbgs() << Loc << ": " << OpName << "(" << SkipTo - DecodeTable
+               << "NoFail = " << NoFail << ")\n";
+      });
       break;
     }
     case MCD::OPC_ExtractField: {
@@ -2366,7 +2404,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
         }
-        Ptr = ScopeStack.pop_back_val();
+        Ptr = ScopeStack.pop_back_val().SkipTo;
         LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       }
       break;
@@ -2394,7 +2432,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
         }
-        Ptr = ScopeStack.pop_back_val();
+        Ptr = ScopeStack.pop_back_val().SkipTo;
         LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       }
       break;
@@ -2415,7 +2453,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
         }
-        Ptr = ScopeStack.pop_back_val();
+        Ptr = ScopeStack.pop_back_val().SkipTo;
         LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       }
       break;
@@ -2440,8 +2478,17 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
 
       LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
                    << ", using decoder " << DecodeIdx << ": "
-                   << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
-      return S;
+                   << (S != MCDisassembler::Fail ? "PASS" : "FAIL"));
+      if (S == MCDisassembler::Success || CanReturnOnFailure()) {
+        LLVM_DEBUG(dbgs() << ", Returning\n");
+        return S;
+      }
+      MI.clear();
+      Ptr = ScopeStack.pop_back_val().SkipTo;
+      LLVM_DEBUG(dbgs() << ", continuing at " << Ptr - DecodeTable << '\n');
+      // Reset the decode status.
+      S = MCDisassembler::Success;
+      break;
     })";
   if (HasTryDecode) {
     OS << R"(
@@ -2460,16 +2507,19 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
 
       if (DecodeComplete) {
         // Decoding complete.
-        LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
-        MI = TmpMI;
-        return S;
+        LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL"));
+        if (S == MCDisassembler::Success || CanReturnOnFailure()) {
+          MI = TmpMI;
+          LLVM_DEBUG(dbgs() << ", Returning\n");
+          return S;
+        }
       }
       assert(S == MCDisassembler::Fail);
       if (ScopeStack.empty()) {
         LLVM_DEBUG(dbgs() << "FAIL, returning FAIL\n");
         return MCDisassembler::Fail;
       }
-      Ptr = ScopeStack.pop_back_val();
+      Ptr = ScopeStack.pop_back_val().SkipTo;
       LLVM_DEBUG(dbgs() << "FAIL, continuing at " << Ptr - DecodeTable << '\n');
       // Reset decode status. This also drops a SoftFail status that could be
       // set before the decode attempt.

>From e94d5b2ab95a98afc71abd22b84af5b898f2b534 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 11 Sep 2025 11:30:39 -0700
Subject: [PATCH 5/5] Split scope stack into 2

---
 llvm/utils/TableGen/DecoderEmitter.cpp | 98 ++++++++++++++++----------
 1 file changed, 60 insertions(+), 38 deletions(-)

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index cb0f930170fd2..7ba75afd9f3ea 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -2317,29 +2317,41 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
 )";
   }
 
+  // We maintain 2 scope stacks. One is ScopeStack which is used in conjunction
+  // with OPC_Scope and it encodes the "regular" forward traversal through the
+  // decoder table in response to Field or Predicate check failures.
+  //
+  // Other is NoFailScopeStack and is used in conjunction with OPC_ScopeNoFail
+  // and it essentially encodes nested checks that in any failure mode continue
+  // further checks and never return from the decode function with a failure.
+  // This is used when we need to attempt multiple decoding possibilities and
+  // a failure anywhere is not a signal to return from the decode function but
+  // to reset some state and continue.
+  //
+  // ScopeStack can be thought of as nested within the NoFailScopeStack. A
+  // "failure" will result in clearing the ScopeStack and continuing execution
+  // at the top of the NoFailScopeStack, if its not empty, else returning from
+  // the decode function with a failure.
+
   OS << R"(
-  struct Scope {
-    const uint8_t *SkipTo;
-    // Indicates a non failing scope, which means we will never return with
-    // failure from the decode function when in this scope. It also implies that
-    // any nested scopes within it will be forced to be non-failing as well.
-    bool NoFail;
-    Scope(const uint8_t *SkipTo, bool NoFail)
-      : SkipTo(SkipTo), NoFail(NoFail) {}
-  };
+  SmallVector<const uint8_t *> ScopeStack;
+  SmallVector<const uint8_t *> NoFailScopeStack;
 
-  SmallVector<Scope, 8> ScopeStack;
+  uint64_t CurFieldValue = 0;
+  DecodeStatus S = MCDisassembler::Success;
 
-  // Returns if we are allowed to fail and return from the function in the
-  // current scope.
-  auto CanReturnOnFailure = [&ScopeStack]() -> bool {
-    if (ScopeStack.empty())
-      return true;
-    return !ScopeStack.back().NoFail;
+  // Handle a return with failure. Returns true if we can actually return from
+  // the decode function, else adjust the state and return the continuation
+  // point.
+  auto PopScope = [&](bool CheckScopeStack) -> std::pair<bool, const uint8_t*> {
+    if (CheckScopeStack && !ScopeStack.empty())
+      return {false, ScopeStack.pop_back_val()};
+    ScopeStack.resize(0);
+    if (!NoFailScopeStack.empty())
+      return {false, NoFailScopeStack.pop_back_val()};
+    return {true, nullptr};
   };
 
-  uint64_t CurFieldValue = 0;
-  DecodeStatus S = MCDisassembler::Success;
   while (true) {
     ptrdiff_t Loc = Ptr - DecodeTable;
     const uint8_t DecoderOp = *Ptr++;
@@ -2352,13 +2364,11 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
     case MCD::OPC_ScopeNoFail: {
       unsigned NumToSkip = decodeNumToSkip(Ptr);
       const uint8_t *SkipTo = Ptr + NumToSkip;
-      bool NoFail = DecoderOp == MCD::OPC_ScopeNoFail;
-      NoFail |= !CanReturnOnFailure();
-      ScopeStack.emplace_back(SkipTo, NoFail);
+      SmallVector<const uint8_t *> &Stack = DecoderOp == MCD::OPC_ScopeNoFail ? NoFailScopeStack : ScopeStack;
+      Stack.push_back(SkipTo);
       LLVM_DEBUG({
         const char *OpName = DecoderOp == MCD::OPC_Scope ? "OPC_Scope" : "OPC_ScopeNoFail";
-        dbgs() << Loc << ": " << OpName << "(" << SkipTo - DecodeTable
-               << "NoFail = " << NoFail << ")\n";
+        dbgs() << Loc << ": " << OpName << "(" << SkipTo - DecodeTable << ")\n";
       });
       break;
     }
@@ -2400,11 +2410,12 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                         << (Failed ? "FAIL, " : "PASS\n"));
 
       if (Failed) {
-        if (ScopeStack.empty()) {
+        auto Ret = PopScope(/*CheckScopeStack=*/true);
+        if (Ret.first) {
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
         }
-        Ptr = ScopeStack.pop_back_val().SkipTo;
+        Ptr = Ret.second;
         LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       }
       break;
@@ -2428,11 +2439,12 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                         << FieldValue << ", ExpectedValue = " << ExpectedValue
                         << ": " << (Failed ? "FAIL, " : "PASS\n"););
       if (Failed) {
-        if (ScopeStack.empty()) {
+        auto Ret = PopScope(/*CheckScopeStack=*/true);
+        if (Ret.first) {
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
         }
-        Ptr = ScopeStack.pop_back_val().SkipTo;
+        Ptr = Ret.second;
         LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       }
       break;
@@ -2449,11 +2461,12 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                         << (Failed ? "FAIL, " : "PASS\n"););
 
       if (Failed) {
-        if (ScopeStack.empty()) {
+        auto Ret = PopScope(/*CheckScopeStack=*/true);
+        if (Ret.first) {
           LLVM_DEBUG(dbgs() << "returning Fail\n");
           return MCDisassembler::Fail;
         }
-        Ptr = ScopeStack.pop_back_val().SkipTo;
+        Ptr = Ret.second;
         LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       }
       break;
@@ -2479,15 +2492,21 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
                    << ", using decoder " << DecodeIdx << ": "
                    << (S != MCDisassembler::Fail ? "PASS" : "FAIL"));
-      if (S == MCDisassembler::Success || CanReturnOnFailure()) {
+      if (S != MCDisassembler::Fail) {
         LLVM_DEBUG(dbgs() << ", Returning\n");
         return S;
       }
-      MI.clear();
-      Ptr = ScopeStack.pop_back_val().SkipTo;
-      LLVM_DEBUG(dbgs() << ", continuing at " << Ptr - DecodeTable << '\n');
+      // We ignore the scope stack and just check NoFail stack here.
+      auto Ret = PopScope(/*CheckScopeStack=*/false);
+      if (Ret.first) {
+        LLVM_DEBUG(dbgs() << "returning Fail\n");
+        return MCDisassembler::Fail;
+      }
       // Reset the decode status.
+      MI.clear();
       S = MCDisassembler::Success;
+      Ptr = Ret.second;
+      LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
       break;
     })";
   if (HasTryDecode) {
@@ -2508,22 +2527,25 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       if (DecodeComplete) {
         // Decoding complete.
         LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL"));
-        if (S == MCDisassembler::Success || CanReturnOnFailure()) {
+        if (S != MCDisassembler::Fail) {
           MI = TmpMI;
           LLVM_DEBUG(dbgs() << ", Returning\n");
           return S;
         }
       }
-      assert(S == MCDisassembler::Fail);
-      if (ScopeStack.empty()) {
+      // If decoding was complete, we only check the NoFail stack, else we
+      // check both stacks when popping the scope.
+      auto Ret = PopScope(/*CheckScopeStack=*/!DecodeComplete);
+
+      if (Ret.first) {
         LLVM_DEBUG(dbgs() << "FAIL, returning FAIL\n");
         return MCDisassembler::Fail;
       }
-      Ptr = ScopeStack.pop_back_val().SkipTo;
-      LLVM_DEBUG(dbgs() << "FAIL, continuing at " << Ptr - DecodeTable << '\n');
       // Reset decode status. This also drops a SoftFail status that could be
       // set before the decode attempt.
       S = MCDisassembler::Success;
+      Ptr = Ret.second;
+      LLVM_DEBUG(dbgs() << "FAIL, continuing at " << Ptr - DecodeTable << '\n');
       break;
     })";
   }