[llvm] [AMDGPU][True16][MC] 16bit src modifier and vdst support in asm/disasm (PR #104510)

Fri Aug 16 13:20:09 PDT 2024

https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/104510

>From 3bf43638265874282d5ffb77ae12230f7628f18b Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 15 Aug 2024 17:25:54 -0400
Subject: [PATCH 1/2] true16 support in MC

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  35 ++-
 .../Disassembler/AMDGPUDisassembler.cpp       |  20 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         | 208 +++++++++++++-----
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |  45 ++--
 4 files changed, 214 insertions(+), 94 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1a10206eea2374..fd10e0202560d3 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -280,8 +280,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
   }
 
-  bool isRegOrImmWithIntT16InputMods() const {
-    return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
+  template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
+    return isRegOrImmWithInputMods(
+        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
   }
 
   bool isRegOrImmWithInt32InputMods() const {
@@ -292,6 +293,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
   }
 
+  template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
+    return isRegOrInline(
+        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
+  }
+
   bool isRegOrInlineImmWithInt32InputMods() const {
     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
   }
@@ -304,8 +310,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
   }
 
-  bool isRegOrImmWithFPT16InputMods() const {
-    return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
+  template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
+    return isRegOrImmWithInputMods(
+        IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
   }
 
   bool isRegOrImmWithFP32InputMods() const {
@@ -354,6 +361,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   }
 
   bool isVRegWithInputMods() const;
+  template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
   template <bool IsFake16> bool isT16VRegWithInputMods() const;
 
   bool isSDWAOperand(MVT type) const;
@@ -515,7 +523,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
   }
 
-  bool isVCSrcTB16() const {
+  bool isVCSrcT_b16() const {
     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
   }
 
@@ -545,7 +553,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
   }
 
-  bool isVCSrcTF16() const {
+  bool isVCSrcT_f16() const {
+    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
+  }
+
+  bool isVCSrcT_bf16() const {
     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
   }
 
@@ -583,7 +595,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 
   bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
 
-  bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
+  bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
 
   bool isVSrcT_b16_Lo128() const {
     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
@@ -617,7 +629,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 
   bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
 
-  bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
+  bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
 
   bool isVSrcT_bf16_Lo128() const {
     return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
@@ -2162,11 +2174,16 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
 }
 
-template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+template <bool IsFake16> bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
   return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
                              : AMDGPU::VGPR_16_Lo128RegClassID);
 }
 
+template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+  return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
+                             : AMDGPU::VGPR_16RegClassID);
+}
+
 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
   if (AsmParser->isVI())
     return isVReg32();
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a0dc7098347ac..ada6c3f2632b25 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -328,36 +328,40 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
 }
 
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+          unsigned OperandSemantics>
 static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
                                                 uint64_t /*Addr*/,
                                                 const MCDisassembler *Decoder) {
   assert(isUInt<9>(Imm) && "9-bit encoding expected");
 
   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
-  bool IsVGPR = Imm & (1 << 8);
-  if (IsVGPR) {
+  if (Imm && AMDGPU::EncValues::IS_VGPR) {
     bool IsHi = Imm & (1 << 7);
     unsigned RegIdx = Imm & 0x7f;
     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
   }
-  return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
-                                                   Imm & 0xFF, false, 16));
+  return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+                               OpWidth, Imm & 0xFF, false, ImmWidth,
+                               (AMDGPU::OperandSemantics)OperandSemantics));
 }
 
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+          unsigned OperandSemantics>
 static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
                                           uint64_t /*Addr*/,
                                           const MCDisassembler *Decoder) {
   assert(isUInt<10>(Imm) && "10-bit encoding expected");
 
   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
-  bool IsVGPR = Imm & (1 << 8);
-  if (IsVGPR) {
+  if (Imm && AMDGPU::EncValues::IS_VGPR) {
     bool IsHi = Imm & (1 << 9);
     unsigned RegIdx = Imm & 0xff;
     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
   }
-  return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
-                                                   Imm & 0xFF, false, 16));
+  return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+                               OpWidth, Imm & 0xFF, false, ImmWidth,
+                               (AMDGPU::OperandSemantics)OperandSemantics));
 }
 
 static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e99b43afd1c3a2..0901c915b41d9c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1207,9 +1207,11 @@ class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
 }
 
 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
-def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
-  let Name = "RegOrImmWithFPT16InputMods";
-  let PredicateMethod = "isRegOrImmWithFPT16InputMods";
+class FPT16InputModsMatchClass<bit IsFake16> : FPInputModsMatchClass<16> {
+  let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods",
+                 "RegOrImmWithFPT16InputMods");
+  let PredicateMethod = "isRegOrImmWithFPT16InputMods<" #
+                        !if(IsFake16, "true", "false") # ">";
 }
 def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
 def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -1234,12 +1236,19 @@ class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
 }
 
 def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
-def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
+class FPT16InputMods<bit IsFake16> : FPInputMods<FPT16InputModsMatchClass<IsFake16>> {
+  let EncoderMethod = "getMachineOpValueT16";
+}
 def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
+def FP32T16DstInputMods : FPInputMods<FP32InputModsMatchClass> {
+  let EncoderMethod = "getMachineOpValueT16";
+}
 def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
 
-class FP16VCSrcInputMods<bit IsFake16>
-  : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
+class FPT16VCSrcInputMods<bit IsFake16 = 1>
+  : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>> {
+let EncoderMethod = "getMachineOpValueT16";
+}
 def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
 
 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1251,21 +1260,38 @@ class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize>
   let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
   let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
 }
-def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
-  let Name = "RegOrImmWithIntT16InputMods";
-  let PredicateMethod = "isRegOrImmWithIntT16InputMods";
+class IntT16InputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
+  let Name = !if(IsFake16, "RegOrImmWithIntFake16InputMods",
+                 "RegOrImmWithIntT16InputMods");
+  let PredicateMethod = "isRegOrImmWithIntT16InputMods<" #
+                        !if(IsFake16, "true", "false") # ">";
 }
 def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
 def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
 def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
+class IntT16VCSrcInputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
+  let Name = !if(IsFake16, "RegOrInlineImmWithIntFake16InputMods",
+                 "RegOrInlineImmWithIntT16InputMods");
+  let PredicateMethod = "isRegOrInlineImmWithIntT16InputMods<" #
+                        !if(IsFake16, "true", "false") # ">";
+}
 
 class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndIntInputMods";
 }
-def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
+class IntT16InputMods<bit IsFake16> : IntInputMods<IntT16InputModsMatchClass<IsFake16>> {
+  let EncoderMethod = "getMachineOpValueT16";
+}
 def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
+def Int32T16DstInputMods : IntInputMods<Int32InputModsMatchClass> {
+  let EncoderMethod = "getMachineOpValueT16";
+}
 def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
 def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
+class IntT16VCSrcInputMods<bit IsFake16 = 1>
+    : IntInputMods<IntT16VCSrcInputModsMatchClass<IsFake16>> {
+  let EncoderMethod = "getMachineOpValueT16";
+}
 
 class OpSelModsMatchClass : AsmOperandClass {
   let Name = "OpSelMods";
@@ -1299,6 +1325,23 @@ def FPVRegInputModsMatchClass : AsmOperandClass {
   let PredicateMethod = "isVRegWithInputMods";
 }
 
+def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
+  let PrintMethod = "printOperandAndFPInputMods";
+}
+
+def FPVRegT16DstInputMods : InputMods <FPVRegInputModsMatchClass> {
+  let PrintMethod = "printOperandAndFPInputMods";
+  let EncoderMethod = "getMachineOpValueT16";
+}
+
+class FPT16_Lo128VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+  let Name = !if(IsFake16, "Fake16_Lo128VRegWithFPInputMods",
+                 "T16_Lo128VRegWithFPInputMods");
+  let ParserMethod = "parseRegWithFPInputMods";
+  let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
+                        !if(IsFake16, "true", "false") # ">";
+}
+
 class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
   let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
                  "T16VRegWithFPInputMods");
@@ -1307,13 +1350,16 @@ class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
                         !if(IsFake16, "true", "false") # ">";
 }
 
-def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
+class FPT16_Lo128VRegInputMods<bit IsFake16 = 1>
+    : InputMods <FPT16_Lo128VRegInputModsMatchClass<IsFake16>> {
   let PrintMethod = "printOperandAndFPInputMods";
+  let EncoderMethod = "getMachineOpValueT16Lo128";
 }
 
-class FPT16VRegInputMods<bit IsFake16>
+class FPT16VRegInputMods<bit IsFake16 = 1>
     : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
   let PrintMethod = "printOperandAndFPInputMods";
+  let EncoderMethod = "getMachineOpValueT16";
 }
 
 class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1344,7 +1390,15 @@ def IntVRegInputModsMatchClass : AsmOperandClass {
   let PredicateMethod = "isVRegWithInputMods";
 }
 
-class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+class IntT16_Lo128VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
+  let Name = !if(IsFake16, "Fake16_Lo128VRegWithIntInputMods",
+                 "T16_Lo128VRegWithIntInputMods");
+  let ParserMethod = "parseRegWithIntInputMods";
+  let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
+                        !if(IsFake16, "true", "false") # ">";
+}
+
+class IntT16VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
   let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
                  "T16VRegWithIntInputMods");
   let ParserMethod = "parseRegWithIntInputMods";
@@ -1352,15 +1406,27 @@ class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
                         !if(IsFake16, "true", "false") # ">";
 }
 
-class IntT16VRegInputMods<bit IsFake16>
+class IntT16_Lo128VRegInputMods<bit IsFake16 = 1>
+    : InputMods <IntT16_Lo128VRegInputModsMatchClass<IsFake16>> {
+  let PrintMethod = "printOperandAndIntInputMods";
+  let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
+class IntT16VRegInputMods<bit IsFake16 = 1>
     : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
   let PrintMethod = "printOperandAndIntInputMods";
+  let EncoderMethod = "getMachineOpValueT16";
 }
 
 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
   let PrintMethod = "printOperandAndIntInputMods";
 }
 
+def IntVRegT16DstInputMods : InputMods <IntVRegInputModsMatchClass> {
+  let PrintMethod = "printOperandAndIntInputMods";
+  let EncoderMethod = "getMachineOpValueT16";
+}
+
 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
   let Name = "PackedFP"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithFPInputMods";
@@ -1587,7 +1653,7 @@ class getSOPSrcForVT<ValueType VT> {
 }
 
 // Returns the vreg register class to use for source operand given VT
-class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
+class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
   RegisterOperand ret =
   !cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
         !eq(VT.Size, 96)  : RegisterOperand<VReg_96>,
@@ -1629,12 +1695,12 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
 }
 
 // Src2 of VOP3 DPP instructions cannot be a literal
-class getVOP3DPPSrcForVT<ValueType VT> {
+class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
   RegisterOperand ret =
   !cond(!eq(VT, i1)     : SSrc_i1,
-        !eq(VT, i16)    : VCSrc_b16,
-        !eq(VT, f16)    : VCSrc_f16,
-        !eq(VT, bf16)   : VCSrc_bf16,
+        !eq(VT, i16)    : !if (IsFake16, VCSrcT_b16, VCSrc_b16),
+        !eq(VT, f16)    : !if (IsFake16, VCSrcT_f16, VCSrc_f16),
+        !eq(VT, bf16)   : !if (IsFake16, VCSrcT_bf16, VCSrc_bf16),
         !eq(VT, v2i16)  : VCSrc_v2b16,
         !eq(VT, v2f16)  : VCSrc_v2f16,
         !eq(VT, v2bf16) : VCSrc_v2bf16,
@@ -1668,23 +1734,27 @@ class isModifierType<ValueType SrcVT> {
                 !eq(SrcVT.Value, v16bf16.Value));
 }
 
-// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
-  Operand ret =  !if(!eq(VT.Size, 64),
+// Return type of input modifiers operand for specified input operand.
+// True16: If the destination is a 16-bit value, the src0 modifier must hold
+// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
+// 64-bit src types are not implemented for True16 dst.
+class getSrc0Mod <ValueType VT, ValueType DstVT, bit IsTrue16 = 0, bit IsFake16 = 1> {
+  defvar T16Dst =  !if(!eq(VT.Size, 64),
+                     !if(VT.isFP, FP64InputMods, Int64InputMods),
+                     !if(!eq(VT.Size, 16),
+                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
+                                      !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
+                         !if(VT.isFP, FP32T16DstInputMods, Int32T16DstInputMods)));
+  defvar Normal =  !if(!eq(VT.Size, 64),
                      !if(VT.isFP, FP64InputMods, Int64InputMods),
                      !if(!eq(VT.Size, 16),
-                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
-                                      !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
+                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
+                                      !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
                          !if(VT.isFP, FP32InputMods, Int32InputMods)));
+  Operand ret = !if(!and(IsTrue16, !eq(DstVT.Size, 16)), T16Dst, Normal);
 }
 
-class getOpSelMod <ValueType VT> {
-  Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
-                      !eq(VT, bf16) : FP16InputMods,
-                      !eq(VT, v2f16) : PackedF16InputMods,
-                      !eq(VT, v2bf16) : PackedF16InputMods,
-                      1 : IntOpSelMods);
-}
+class getSrcMod<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> : getSrc0Mod<VT, f128/*Dummy Arg*/, IsTrue16, IsFake16>;
 
 // Return type of input modifiers operand specified input operand for DPP
 class getSrcModDPP <ValueType VT> {
@@ -1695,18 +1765,42 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
   Operand ret =
       !if (VT.isFP,
            !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
-                FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
+                FPT16_Lo128VRegInputMods<IsFake16>, FPVRegInputMods),
            !if (!eq(VT.Value, i16.Value),
-                IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
+                IntT16_Lo128VRegInputMods<IsFake16>, IntVRegInputMods));
 }
 
 // Return type of input modifiers operand for specified input operand for DPP
-class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
+// True16: If the destination is a 16-bit value, the src0 modifier must hold
+// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
+// 64-bit src types are not implemented for True16 dst.
+class getSrc0ModVOP3DPP <ValueType VT, ValueType DstVT, bit IsFake16 = 1> {
+  defvar T16Dst =
+      !if (VT.isFP,
+           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
+                FPT16VRegInputMods<IsFake16>, FPVRegT16DstInputMods),
+           !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods<IsFake16>,
+                IntVRegT16DstInputMods));
+  defvar Normal =
+      !if (VT.isFP,
+           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
+                FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
+           !if (!eq(VT.Value, i16.Value),
+                IntT16VRegInputMods<IsFake16>,
+                IntVRegInputMods));
+  Operand ret = !if(!and(!not(IsFake16), !eq(DstVT.Size, 16)), T16Dst, Normal);
+}
+
+// GFX11 only supports VGPR src1, but the restriction is done in AsmParser
+// and GCNDPPCombine.
+class getSrcModVOP3DPP<ValueType VT, bit IsFake16 = 1> {
   Operand ret =
       !if (VT.isFP,
            !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
-                FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
-           Int32VCSrcInputMods);
+                FPT16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
+           !if (!eq(VT.Value, i16.Value),
+                IntT16VCSrcInputMods<IsFake16>,
+                Int32VCSrcInputMods));
 }
 
 // Return type of input modifiers operand specified input operand for SDWA
@@ -1747,9 +1841,11 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
             (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
       /* else */,
         // VOP1 without modifiers
-        !if (HasClamp,
-          (ins Src0RC:$src0, Clamp0:$clamp),
-          (ins Src0RC:$src0))
+        !if(HasOMod,
+          (ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod),
+          !if (HasClamp,
+            (ins Src0RC:$src0, Clamp0:$clamp),
+            (ins Src0RC:$src0)))
       /* endif */ ),
     !if (!eq(NumSrcArgs, 2),
       !if (HasModifiers,
@@ -2322,13 +2418,13 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
   field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
   field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
-  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
+  field Operand Src0Mod = getSrc0Mod<Src0VT, DstVT>.ret;
   field Operand Src1Mod = getSrcMod<Src1VT>.ret;
   field Operand Src2Mod = getSrcMod<Src2VT>.ret;
   field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
   field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
   field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
-  field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
+  field Operand Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
   field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
   field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
   field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
@@ -2419,9 +2515,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                                    Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
   field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
                                 NumSrcArgs, HasClamp, HasOMod,
-                                getOpSelMod<Src0VT>.ret,
-                                getOpSelMod<Src1VT>.ret,
-                                getOpSelMod<Src2VT>.ret>.ret;
+                                Src0Mod, Src1Mod, Src2Mod>.ret;
   field dag InsDPP = !if(HasExtDPP,
                          getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
                                    HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
@@ -2509,7 +2603,6 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
 
   // Most DstVT are 16-bit, but not all.
   let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
-  let DstRC64 = getVALUDstForVT<DstVT>.ret;
   let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
   let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
   let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
@@ -2519,7 +2612,9 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
   let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
   let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
   let Src0VOP3DPP = VGPRSrc_16;
-  let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
+  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret;
+  let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret;
+  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
   let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
   let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
 
@@ -2527,9 +2622,9 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
   let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
   let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
   let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
-  let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret;
-  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret;
-  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
+  let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret;
+  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret;
 }
 
 class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
@@ -2541,9 +2636,18 @@ class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
   let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
   let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
   let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-  let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
-  let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
-  let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+  let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret;
+  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret;
+  let Src0VOP3DPP = VGPRSrc_32;
+  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1 /*IsFake16*/>.ret;
+  let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1 /*IsFake16*/>.ret;
+  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
 }
 
 def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index d3e39464fea396..443797ef0ff4bf 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1131,10 +1131,17 @@ def OperandSemantics {
 class SrcRegOrImm9<RegisterClass regClass, string opWidth, string operandType,
                    int immWidth, int OperandSemantics>
     : RegOrImmOperand<regClass, operandType> {
-  let DecoderMethod = "decodeSrcRegOrImm9<AMDGPUDisassembler::" # opWidth #
+  string DecoderMethodName = "decodeSrcRegOrImm9";
+  let DecoderMethod = DecoderMethodName # "<AMDGPUDisassembler::" # opWidth #
                       ", " # immWidth # ", " # OperandSemantics # ">";
 }
 
+class SrcRegOrImm9_t16<string operandType, int OperandSemantics, RegisterClass regClass = VS_16>
+    : SrcRegOrImm9<regClass, "OPW16", operandType, 16, OperandSemantics> {
+  let DecoderMethodName = "decodeOperand_VSrcT16";
+  let EncoderMethod = "getMachineOpValueT16";
+}
+
 def SSrc_b16 : SrcRegOrImm9 <SReg_32, "OPW32", "OPERAND_REG_IMM_INT16", 16, OperandSemantics.INT>;
 def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPW32", "OPERAND_REG_IMM_BF16", 16, OperandSemantics.BF16>;
 def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPW32", "OPERAND_REG_IMM_FP16", 16, OperandSemantics.FP16>;
@@ -1175,32 +1182,16 @@ def VSrc_bf16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_IMM_BF16", 16, Operan
 def VSrc_f16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_IMM_FP16", 16, OperandSemantics.FP16>;
 
 // True16 VOP3 operands.
-def VSrcT_b16 : RegOrImmOperand <VS_16, "OPERAND_REG_IMM_INT16"> {
-  let EncoderMethod = "getMachineOpValueT16";
-  let DecoderMethod = "decodeOperand_VSrcT16";
-}
-def VSrcT_bf16 : RegOrImmOperand <VS_16, "OPERAND_REG_IMM_BF16"> {
-  let EncoderMethod = "getMachineOpValueT16";
-  let DecoderMethod = "decodeOperand_VSrcT16";
-}
-def VSrcT_f16 : RegOrImmOperand <VS_16, "OPERAND_REG_IMM_FP16"> {
-  let EncoderMethod = "getMachineOpValueT16";
-  let DecoderMethod = "decodeOperand_VSrcT16";
-}
+def VSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_INT16", OperandSemantics.INT>;
+def VSrcT_bf16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_BF16", OperandSemantics.BF16>;
+def VSrcT_f16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_FP16", OperandSemantics.FP16>;
 
 // True16 VOP1/2/C operands.
-def VSrcT_b16_Lo128 : RegOrImmOperand <VS_16_Lo128, "OPERAND_REG_IMM_INT16"> {
-  let EncoderMethod = "getMachineOpValueT16Lo128";
-  let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
-}
-def VSrcT_bf16_Lo128 : RegOrImmOperand <VS_16_Lo128, "OPERAND_REG_IMM_BF16"> {
-  let EncoderMethod = "getMachineOpValueT16Lo128";
-  let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
-}
-def VSrcT_f16_Lo128 : RegOrImmOperand <VS_16_Lo128, "OPERAND_REG_IMM_FP16"> {
-  let EncoderMethod = "getMachineOpValueT16Lo128";
-  let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
-}
+let DecoderMethodName = "decodeOperand_VSrcT16_Lo128", EncoderMethod = "getMachineOpValueT16Lo128" in {
+  def VSrcT_b16_Lo128 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_INT16", OperandSemantics.INT, VS_16_Lo128>;
+  def VSrcT_bf16_Lo128 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_BF16", OperandSemantics.BF16, VS_16_Lo128>;
+  def VSrcT_f16_Lo128 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_FP16",OperandSemantics.FP16, VS_16_Lo128>;
+} // End DecoderMethodName = "decodeOperand_VSrcT16_Lo128", EncoderMethod = "getMachineOpValueT16Lo128"
 
 // The current and temporary future default used case for fake VOP1/2/C.
 // For VOP1,2,C True16 instructions. _Lo128 use first 128 32-bit VGPRs only.
@@ -1303,6 +1294,10 @@ def VCSrc_v2b16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_INLINE_C_V2INT16",
 def VCSrc_v2bf16: SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_INLINE_C_V2BF16", 16, OperandSemantics.BF16>;
 def VCSrc_v2f16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_INLINE_C_V2FP16", 16, OperandSemantics.FP16>;
 
+// True 16 Operands
+def VCSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_INT16", OperandSemantics.INT>;
+def VCSrcT_bf16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_BF16", OperandSemantics.BF16>;
+def VCSrcT_f16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_FP16", OperandSemantics.FP16>;
 //===----------------------------------------------------------------------===//
 //  VISrc_* Operands with a VGPR or an inline constant
 //===----------------------------------------------------------------------===//

>From 25063d39b5d014fa64432fb79c4fe37c540e705a Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Fri, 16 Aug 2024 16:19:54 -0400
Subject: [PATCH 2/2] tmp

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td        |  2 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td   | 12 +++---
 llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s | 40 ++++++++++----------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0901c915b41d9c..a16f00bda0d2b4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2413,7 +2413,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
   field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
   field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
-  field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
+  field RegisterOperand Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32);
   field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
   field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
   field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index fccaa27f361381..7a0885b5ee7d48 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -923,14 +923,14 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
   let Src1ModSDWA = Int16SDWAInputMods;
 }
 def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
-  let Src1RC32 = RegisterOperand<VGPR_16_Lo128>;
-  let Src1DPP = RegisterOperand<VGPR_16_Lo128>;
-  let Src1ModDPP = IntT16VRegInputMods<0/*IsFake16*/>;
+  let Src1Mod = IntT16InputMods<0/*IsFake16*/>;
+  let Src1ModDPP = IntT16_Lo128VRegInputMods<0/*IsFake16*/>;
+  let Src1ModVOP3DPP = IntT16VCSrcInputMods<0/*IsFake16*/>;
 }
 def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
-  let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
-  let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
-  let Src1ModDPP = IntT16VRegInputMods<1/*IsFake16*/>;
+  let Src1Mod = Int32InputMods;
+  let Src1ModDPP = IntT16_Lo128VRegInputMods<1/*IsFake16*/>;
+  let Src1ModVOP3DPP = IntT16VCSrcInputMods<1/*IsFake16*/>;
 }
 
 let isReMaterializable = 1 in {
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
index 7c50b4c22fc294..76b1c38fad43d9 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
@@ -1,5 +1,5 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
 
 v_add_f16_e32 v255, v1, v2
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -13,8 +13,8 @@ v_fmac_f16_e32 v255, v1, v2
 v_fmamk_f16_e32 v255, v1, 0xfe0b, v3
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_e32 v255, v1, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_e32 v255.l, v1.l, v2.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_e32 v255, v1, v2
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -43,8 +43,8 @@ v_fmac_f16_e32 v5, v255, v2
 v_fmamk_f16_e32 v5, v255, 0xfe0b, v3
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_e32 v5, v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_e32 v5.l, v255.l, v2.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_e32 v5, v255, v2
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -73,8 +73,8 @@ v_fmac_f16_e32 v5, v1, v255
 v_fmamk_f16_e32 v5, v1, 0xfe0b, v255
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_e32 v5, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_e32 v5.l, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_e32 v5, v1, v255
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -97,8 +97,8 @@ v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
 v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -121,8 +121,8 @@ v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
 v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -145,8 +145,8 @@ v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
 v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -169,8 +169,8 @@ v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -193,8 +193,8 @@ v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
 v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -217,8 +217,8 @@ v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
 v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
-v_ldexp_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode