[llvm] de3d0ee - Revert "Revert "[MIR] Target specific MIR formating and parsing""

Daniel Sanders via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 9 11:10:08 PST 2020



> On Jan 9, 2020, at 11:07, Benjamin Kramer <benny.kra at gmail.com> wrote:
> 
> On Thu, Jan 9, 2020 at 5:03 AM Daniel Sanders via llvm-commits
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>> 
>> 
>> Author: Daniel Sanders
>> Date: 2020-01-08T20:03:29-08:00
>> New Revision: de3d0ee023cb14c06d5be01369ef8db4cbfa16b4
>> 
>> URL: https://github.com/llvm/llvm-project/commit/de3d0ee023cb14c06d5be01369ef8db4cbfa16b4
>> DIFF: https://github.com/llvm/llvm-project/commit/de3d0ee023cb14c06d5be01369ef8db4cbfa16b4.diff
>> 
>> LOG: Revert "Revert "[MIR] Target specific MIR formating and parsing""
>> 
>> There was an unguarded dereference of MF in a function that permitted
>> nullptr. Fixed
>> 
>> This reverts commit 71d64f72f934631aa2f12b9542c23f74f256f494.
>> 
>> Added:
>>    llvm/include/llvm/CodeGen/MIRFormatter.h
>> 
>> Modified:
>>    llvm/include/llvm/CodeGen/MIRParser/MIParser.h
>>    llvm/include/llvm/CodeGen/MachineMemOperand.h
>>    llvm/include/llvm/CodeGen/MachineOperand.h
>>    llvm/include/llvm/CodeGen/PseudoSourceValue.h
>>    llvm/include/llvm/CodeGen/TargetInstrInfo.h
>>    llvm/include/llvm/Target/TargetMachine.h
>>    llvm/lib/CodeGen/MIRParser/MILexer.cpp
>>    llvm/lib/CodeGen/MIRParser/MILexer.h
>>    llvm/lib/CodeGen/MIRParser/MIParser.cpp
>>    llvm/lib/CodeGen/MIRPrinter.cpp
>>    llvm/lib/CodeGen/MachineInstr.cpp
>>    llvm/lib/CodeGen/MachineOperand.cpp
>>    llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
>>    llvm/lib/Target/TargetMachine.cpp
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
>>    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
>>    llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
>>    llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
>>    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
>>    llvm/unittests/CodeGen/MachineOperandTest.cpp
>> 
>> Removed:
>> 
>> 
>> 
>> ################################################################################
>> diff  --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
>> new file mode 100644
>> index 000000000000..e57c32c5ae61
>> --- /dev/null
>> +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
>> @@ -0,0 +1,83 @@
>> +//===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===//
>> +//
>> +//                     The LLVM Compiler Infrastructure
>> +//
>> +// This file is distributed under the University of Illinois Open Source
>> +// License. See LICENSE.TXT for details.
>> +//
>> +//===----------------------------------------------------------------------===//
>> +//
>> +// This file contains the declaration of the MIRFormatter class.
>> +//
>> +//===----------------------------------------------------------------------===//
>> +
>> +#ifndef LLVM_CODEGEN_MIRFORMATTER_H
>> +#define LLVM_CODEGEN_MIRFORMATTER_H
>> +
>> +#include "llvm/CodeGen/MachineInstr.h"
>> +#include "llvm/CodeGen/PseudoSourceValue.h"
>> +
>> +namespace llvm {
>> +
>> +struct PerFunctionMIParsingState;
>> +struct SlotMapping;
>> +
>> +/// MIRFormater - Interface to format MIR operand based on target
>> +class MIRFormatter {
>> +public:
>> +  typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
>> +      ErrorCallbackType;
>> +
>> +  MIRFormatter() {}
>> +  virtual ~MIRFormatter() = default;
>> +
>> +  /// Implement target specific printing for machine operand immediate value, so
>> +  /// that we can have more meaningful mnemonic than a 64-bit integer. Passing
>> +  /// None to OpIdx means the index is unknown.
>> +  virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
>> +                        Optional<unsigned> OpIdx, int64_t Imm) const {
>> +    OS << Imm;
>> +  }
>> +
>> +  /// Implement target specific parsing of immediate mnemonics. The mnemonic is
>> +  /// dot seperated strings.
>> +  virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
>> +                                StringRef Src, int64_t &Imm,
>> +                                ErrorCallbackType ErrorCallback) const {
>> +    llvm_unreachable("target did not implement parsing MIR immediate mnemonic");
>> +  }
>> +
>> +  /// Implement target specific printing of target custom pseudo source value.
>> +  /// Default implementation is not necessarily the correct MIR serialization
>> +  /// format.
>> +  virtual void
>> +  printCustomPseudoSourceValue(raw_ostream &OS, ModuleSlotTracker &MST,
>> +                               const PseudoSourceValue &PSV) const {
>> +    PSV.printCustom(OS);
>> +  }
>> +
>> +  /// Implement target specific parsing of target custom pseudo source value.
>> +  virtual bool parseCustomPseudoSourceValue(
>> +      StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
>> +      const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {
>> +    llvm_unreachable(
>> +        "target did not implement parsing MIR custom pseudo source value");
>> +  }
>> +
>> +  /// Helper functions to print IR value as MIR serialization format which will
>> +  /// be useful for target specific printer, e.g. for printing IR value in
>> +  /// custom pseudo source value.
>> +  static void printIRValue(raw_ostream &OS, const Value &V,
>> +                           ModuleSlotTracker &MST);
>> +
>> +  /// Helper functions to parse IR value from MIR serialization format which
>> +  /// will be useful for target specific parser, e.g. for parsing IR value for
>> +  /// custom pseudo source value.
>> +  static bool parseIRValue(StringRef Src, MachineFunction &MF,
>> +                           PerFunctionMIParsingState &PFS, const Value *&V,
>> +                           ErrorCallbackType ErrorCallback);
>> +};
>> +
>> +} // end namespace llvm
>> +
>> +#endif
>> 
>> diff  --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
>> index 4e32a04551c1..8ca665b23b28 100644
>> --- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
>> +++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
>> @@ -171,12 +171,16 @@ struct PerFunctionMIParsingState {
>>   DenseMap<unsigned, unsigned> ConstantPoolSlots;
>>   DenseMap<unsigned, unsigned> JumpTableSlots;
>> 
>> +  /// Maps from slot numbers to function's unnamed values.
>> +  DenseMap<unsigned, const Value *> Slots2Values;
>> +
>>   PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
>>                             const SlotMapping &IRSlots,
>>                             PerTargetMIParsingState &Target);
>> 
>>   VRegInfo &getVRegInfo(unsigned Num);
>>   VRegInfo &getVRegInfoNamed(StringRef RegName);
>> +  const Value *getIRValue(unsigned Slot);
>> };
>> 
>> /// Parse the machine basic block definitions, and skip the machine
>> 
>> diff  --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h
>> index 7ee700c62a25..b0243646b06c 100644
>> --- a/llvm/include/llvm/CodeGen/MachineMemOperand.h
>> +++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h
>> @@ -26,6 +26,7 @@ namespace llvm {
>> 
>> class FoldingSetNodeID;
>> class MDNode;
>> +class MIRFormatter;
>> class raw_ostream;
>> class MachineFunction;
>> class ModuleSlotTracker;
>> @@ -295,7 +296,8 @@ class MachineMemOperand {
>>   /// @{
>>   void print(raw_ostream &OS, ModuleSlotTracker &MST,
>>              SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context,
>> -             const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const;
>> +             const MachineFrameInfo *MFI, const TargetInstrInfo *TII,
>> +             const MIRFormatter *MIRF) const;
>>   /// @}
>> 
>>   friend bool operator==(const MachineMemOperand &LHS,
>> 
>> diff  --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h
>> index df914dc2d85e..4222c03b023a 100644
>> --- a/llvm/include/llvm/CodeGen/MachineOperand.h
>> +++ b/llvm/include/llvm/CodeGen/MachineOperand.h
>> @@ -278,6 +278,9 @@ class MachineOperand {
>>   /// More complex way of printing a MachineOperand.
>>   /// \param TypeToPrint specifies the generic type to be printed on uses and
>>   /// defs. It can be determined using MachineInstr::getTypeToPrint.
>> +  /// \param OpIdx - specifies the index of the operand in machine instruction.
>> +  /// This will be used by target dependent MIR formatter. Could be None if the
>> +  /// index is unknown, e.g. called by dump().
>>   /// \param PrintDef - whether we want to print `def` on an operand which
>>   /// isDef. Sometimes, if the operand is printed before '=', we don't print
>>   /// `def`.
>> @@ -294,8 +297,9 @@ class MachineOperand {
>>   /// information from it's parent.
>>   /// \param IntrinsicInfo - same as \p TRI.
>>   void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint,
>> -             bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies,
>> -             unsigned TiedOperandIdx, const TargetRegisterInfo *TRI,
>> +             Optional<unsigned> OpIdx, bool PrintDef, bool IsStandalone,
>> +             bool ShouldPrintRegisterTies, unsigned TiedOperandIdx,
>> +             const TargetRegisterInfo *TRI,
>>              const TargetIntrinsicInfo *IntrinsicInfo) const;
>> 
>>   /// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level
>> 
>> diff  --git a/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/llvm/include/llvm/CodeGen/PseudoSourceValue.h
>> index 4b3cc9145a13..593a865ea545 100644
>> --- a/llvm/include/llvm/CodeGen/PseudoSourceValue.h
>> +++ b/llvm/include/llvm/CodeGen/PseudoSourceValue.h
>> @@ -22,6 +22,7 @@ namespace llvm {
>> 
>> class MachineFrameInfo;
>> class MachineMemOperand;
>> +class MIRFormatter;
>> class raw_ostream;
>> class TargetInstrInfo;
>> 
>> @@ -52,6 +53,7 @@ class PseudoSourceValue {
>>                                        const PseudoSourceValue* PSV);
>> 
>>   friend class MachineMemOperand; // For printCustom().
>> +  friend class MIRFormatter;      // For printCustom().
>> 
>>   /// Implement printing for PseudoSourceValue. This is called from
>>   /// Value::print or Value's operator<<.
>> 
>> diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
>> index 4b4cea30b2ba..e410d1c4806d 100644
>> --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
>> +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
>> @@ -18,6 +18,7 @@
>> #include "llvm/ADT/DenseMapInfo.h"
>> #include "llvm/ADT/None.h"
>> #include "llvm/CodeGen/LiveRegUnits.h"
>> +#include "llvm/CodeGen/MIRFormatter.h"
>> #include "llvm/CodeGen/MachineBasicBlock.h"
>> #include "llvm/CodeGen/MachineCombinerPattern.h"
>> #include "llvm/CodeGen/MachineFunction.h"
>> @@ -1807,6 +1808,7 @@ class TargetInstrInfo : public MCInstrInfo {
>>                                                          Register Reg) const;
>> 
>> private:
>> +  mutable std::unique_ptr<MIRFormatter> Formatter;
>>   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
>>   unsigned CatchRetOpcode;
>>   unsigned ReturnOpcode;
>> 
>> diff  --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
>> index 176ae39b17a7..39422ac3bf8c 100644
>> --- a/llvm/include/llvm/Target/TargetMachine.h
>> +++ b/llvm/include/llvm/Target/TargetMachine.h
>> @@ -33,6 +33,7 @@ class MCInstrInfo;
>> class MCRegisterInfo;
>> class MCSubtargetInfo;
>> class MCSymbol;
>> +class MIRFormatter;
>> class raw_pwrite_stream;
>> class PassManagerBuilder;
>> struct PerFunctionMIParsingState;
>> @@ -94,6 +95,7 @@ class TargetMachine {
>>   std::unique_ptr<const MCRegisterInfo> MRI;
>>   std::unique_ptr<const MCInstrInfo> MII;
>>   std::unique_ptr<const MCSubtargetInfo> STI;
>> +  std::unique_ptr<const MIRFormatter> MIRF;
>> 
>>   unsigned RequireStructuredCFG : 1;
>>   unsigned O0WantsFastISel : 1;
>> @@ -197,6 +199,10 @@ class TargetMachine {
>>     return nullptr;
>>   }
>> 
>> +  /// Return MIR formatter to format/parse MIR operands.  Target can override
>> +  /// this virtual function and return target specific MIR formatter.
>> +  virtual const MIRFormatter *getMIRFormatter() const { return MIRF.get(); }
>> +
>>   bool requiresStructuredCFG() const { return RequireStructuredCFG; }
>>   void setRequiresStructuredCFG(bool Value) { RequireStructuredCFG = Value; }
>> 
>> 
>> diff  --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
>> index 21511586ff18..0c35a91f8282 100644
>> --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
>> +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
>> @@ -242,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
>>       .Case("jump-table", MIToken::kw_jump_table)
>>       .Case("constant-pool", MIToken::kw_constant_pool)
>>       .Case("call-entry", MIToken::kw_call_entry)
>> +      .Case("custom", MIToken::kw_custom)
>>       .Case("liveout", MIToken::kw_liveout)
>>       .Case("address-taken", MIToken::kw_address_taken)
>>       .Case("landing-pad", MIToken::kw_landing_pad)
>> 
>> diff  --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
>> index 1e2eba91ceb5..af5327cacfea 100644
>> --- a/llvm/lib/CodeGen/MIRParser/MILexer.h
>> +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
>> @@ -110,6 +110,7 @@ struct MIToken {
>>     kw_jump_table,
>>     kw_constant_pool,
>>     kw_call_entry,
>> +    kw_custom,
>>     kw_liveout,
>>     kw_address_taken,
>>     kw_landing_pad,
>> 
>> diff  --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
>> index 525c70016a0f..0f2648e2bfac 100644
>> --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
>> +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
>> @@ -28,6 +28,7 @@
>> #include "llvm/AsmParser/SlotMapping.h"
>> #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
>> #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
>> +#include "llvm/CodeGen/MIRFormatter.h"
>> #include "llvm/CodeGen/MIRPrinter.h"
>> #include "llvm/CodeGen/MachineBasicBlock.h"
>> #include "llvm/CodeGen/MachineFrameInfo.h"
>> @@ -343,6 +344,37 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) {
>>   return *I.first->second;
>> }
>> 
>> +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
>> +                           DenseMap<unsigned, const Value *> &Slots2Values) {
>> +  int Slot = MST.getLocalSlot(V);
>> +  if (Slot == -1)
>> +    return;
>> +  Slots2Values.insert(std::make_pair(unsigned(Slot), V));
>> +}
>> +
>> +/// Creates the mapping from slot numbers to function's unnamed IR values.
>> +static void initSlots2Values(const Function &F,
>> +                             DenseMap<unsigned, const Value *> &Slots2Values) {
>> +  ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
>> +  MST.incorporateFunction(F);
>> +  for (const auto &Arg : F.args())
>> +    mapValueToSlot(&Arg, MST, Slots2Values);
>> +  for (const auto &BB : F) {
>> +    mapValueToSlot(&BB, MST, Slots2Values);
>> +    for (const auto &I : BB)
>> +      mapValueToSlot(&I, MST, Slots2Values);
>> +  }
>> +}
>> +
>> +const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
>> +  if (Slots2Values.empty())
>> +    initSlots2Values(MF.getFunction(), Slots2Values);
>> +  auto ValueInfo = Slots2Values.find(Slot);
>> +  if (ValueInfo == Slots2Values.end())
>> +    return nullptr;
>> +  return ValueInfo->second;
>> +}
>> +
>> namespace {
>> 
>> /// A wrapper struct around the 'MachineOperand' struct that includes a source
>> @@ -370,8 +402,6 @@ class MIParser {
>>   PerFunctionMIParsingState &PFS;
>>   /// Maps from slot numbers to function's unnamed basic blocks.
>>   DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
>> -  /// Maps from slot numbers to function's unnamed values.
>> -  DenseMap<unsigned, const Value *> Slots2Values;
>> 
>> public:
>>   MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
>> @@ -455,9 +485,12 @@ class MIParser {
>>   bool parseTargetIndexOperand(MachineOperand &Dest);
>>   bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
>>   bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
>> -  bool parseMachineOperand(MachineOperand &Dest,
>> +  bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
>> +                           MachineOperand &Dest,
>>                            Optional<unsigned> &TiedDefIdx);
>> -  bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
>> +  bool parseMachineOperandAndTargetFlags(const unsigned OpCode,
>> +                                         const unsigned OpIdx,
>> +                                         MachineOperand &Dest,
>>                                          Optional<unsigned> &TiedDefIdx);
>>   bool parseOffset(int64_t &Offset);
>>   bool parseAlignment(unsigned &Alignment);
>> @@ -473,6 +506,9 @@ class MIParser {
>>   bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
>>   bool parseHeapAllocMarker(MDNode *&Node);
>> 
>> +  bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
>> +                              MachineOperand &Dest, const MIRFormatter &MF);
>> +
>> private:
>>   /// Convert the integer literal in the current token into an unsigned integer.
>>   ///
>> @@ -551,6 +587,9 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
>>   return true;
>> }
>> 
>> +typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
>> +    ErrorCallbackType;
>> +
>> static const char *toString(MIToken::TokenKind TokenKind) {
>>   switch (TokenKind) {
>>   case MIToken::comma:
>> @@ -912,7 +951,7 @@ bool MIParser::parse(MachineInstr *&MI) {
>>          Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
>>     auto Loc = Token.location();
>>     Optional<unsigned> TiedDefIdx;
>> -    if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
>> +    if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
>>       return true;
>>     if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
>>       MO.setIsDebug();
>> @@ -1493,17 +1532,61 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
>>   return false;
>> }
>> 
>> -bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
>> -                               const Constant *&C) {
>> +bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
>> +                                      const unsigned OpIdx,
>> +                                      MachineOperand &Dest,
>> +                                      const MIRFormatter &MF) {
>> +  assert(Token.is(MIToken::dot));
>> +  auto Loc = Token.location(); // record start position
>> +  size_t Len = 1;              // for "."
>> +  lex();
>> +
>> +  // Handle the case that mnemonic starts with number.
>> +  if (Token.is(MIToken::IntegerLiteral)) {
>> +    Len += Token.range().size();
>> +    lex();
>> +  }
>> +
>> +  StringRef Src;
>> +  if (Token.is(MIToken::comma))
>> +    Src = StringRef(Loc, Len);
>> +  else {
>> +    assert(Token.is(MIToken::Identifier));
>> +    Src = StringRef(Loc, Len + Token.stringValue().size());
>> +  }
>> +  int64_t Val;
>> +  if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
>> +                          [this](StringRef::iterator Loc, const Twine &Msg)
>> +                              -> bool { return error(Loc, Msg); }))
>> +    return true;
>> +
>> +  Dest = MachineOperand::CreateImm(Val);
>> +  if (!Token.is(MIToken::comma))
>> +    lex();
>> +  return false;
>> +}
>> +
>> +static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
>> +                            PerFunctionMIParsingState &PFS, const Constant *&C,
>> +                            ErrorCallbackType ErrCB) {
>>   auto Source = StringValue.str(); // The source has to be null terminated.
>>   SMDiagnostic Err;
>> -  C = parseConstantValue(Source, Err, *MF.getFunction().getParent(),
>> +  C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(),
>>                          &PFS.IRSlots);
>>   if (!C)
>> -    return error(Loc + Err.getColumnNo(), Err.getMessage());
>> +    return ErrCB(Loc + Err.getColumnNo(), Err.getMessage());
>>   return false;
>> }
>> 
>> +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
>> +                               const Constant *&C) {
>> +  return ::parseIRConstant(
>> +      Loc, StringValue, PFS, C,
>> +      [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
>> +        return error(Loc, Msg);
>> +      });
>> +}
>> +
>> bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
>>   if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
>>     return true;
>> @@ -1636,27 +1719,52 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
>>   return false;
>> }
>> 
>> -bool MIParser::getUnsigned(unsigned &Result) {
>> +static bool getHexUint(const MIToken &Token, APInt &Result) {
>> +  assert(Token.is(MIToken::HexLiteral));
>> +  StringRef S = Token.range();
>> +  assert(S[0] == '0' && tolower(S[1]) == 'x');
>> +  // This could be a floating point literal with a special prefix.
>> +  if (!isxdigit(S[2]))
>> +    return true;
>> +  StringRef V = S.substr(2);
>> +  APInt A(V.size()*4, V, 16);
>> +
>> +  // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
>> +  // sure it isn't the case before constructing result.
>> +  unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
>> +  Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
>> +  return false;
>> +}
>> +
>> +bool getUnsigned(const MIToken &Token, unsigned &Result,
>> +                 ErrorCallbackType ErrCB) {
>>   if (Token.hasIntegerValue()) {
>>     const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
>>     uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
>>     if (Val64 == Limit)
>> -      return error("expected 32-bit integer (too large)");
>> +      return ErrCB(Token.location(), "expected 32-bit integer (too large)");
>>     Result = Val64;
>>     return false;
>>   }
>>   if (Token.is(MIToken::HexLiteral)) {
>>     APInt A;
>> -    if (getHexUint(A))
>> +    if (getHexUint(Token, A))
>>       return true;
>>     if (A.getBitWidth() > 32)
>> -      return error("expected 32-bit integer (too large)");
>> +      return ErrCB(Token.location(), "expected 32-bit integer (too large)");
>>     Result = A.getZExtValue();
>>     return false;
>>   }
>>   return true;
>> }
>> 
>> +bool MIParser::getUnsigned(unsigned &Result) {
>> +  return ::getUnsigned(
>> +      Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
>> +        return error(Loc, Msg);
>> +      });
>> +}
>> +
>> bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
>>   assert(Token.is(MIToken::MachineBasicBlock) ||
>>          Token.is(MIToken::MachineBasicBlockLabel));
>> @@ -1736,23 +1844,25 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
>>   return false;
>> }
>> 
>> -bool MIParser::parseGlobalValue(GlobalValue *&GV) {
>> +static bool parseGlobalValue(const MIToken &Token,
>> +                             PerFunctionMIParsingState &PFS, GlobalValue *&GV,
>> +                             ErrorCallbackType ErrCB) {
>>   switch (Token.kind()) {
>>   case MIToken::NamedGlobalValue: {
>> -    const Module *M = MF.getFunction().getParent();
>> +    const Module *M = PFS.MF.getFunction().getParent();
>>     GV = M->getNamedValue(Token.stringValue());
>>     if (!GV)
>> -      return error(Twine("use of undefined global value '") + Token.range() +
>> -                   "'");
>> +      return ErrCB(Token.location(), Twine("use of undefined global value '") +
>> +                                         Token.range() + "'");
>>     break;
>>   }
>>   case MIToken::GlobalValue: {
>>     unsigned GVIdx;
>> -    if (getUnsigned(GVIdx))
>> +    if (getUnsigned(Token, GVIdx, ErrCB))
>>       return true;
>>     if (GVIdx >= PFS.IRSlots.GlobalValues.size())
>> -      return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
>> -                   "'");
>> +      return ErrCB(Token.location(), Twine("use of undefined global value '@") +
>> +                                         Twine(GVIdx) + "'");
>>     GV = PFS.IRSlots.GlobalValues[GVIdx];
>>     break;
>>   }
>> @@ -1762,6 +1872,14 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) {
>>   return false;
>> }
>> 
>> +bool MIParser::parseGlobalValue(GlobalValue *&GV) {
>> +  return ::parseGlobalValue(
>> +      Token, PFS, GV,
>> +      [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
>> +        return error(Loc, Msg);
>> +      });
>> +}
>> +
>> bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
>>   GlobalValue *GV = nullptr;
>>   if (parseGlobalValue(GV))
>> @@ -2410,7 +2528,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
>>   return false;
>> }
>> 
>> -bool MIParser::parseMachineOperand(MachineOperand &Dest,
>> +bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
>> +                                   MachineOperand &Dest,
>>                                    Optional<unsigned> &TiedDefIdx) {
>>   switch (Token.kind()) {
>>   case MIToken::kw_implicit:
>> @@ -2499,6 +2618,12 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
>>       return parseCustomRegisterMaskOperand(Dest);
>>     } else
>>       return parseTypedImmediateOperand(Dest);
>> +  case MIToken::dot: {
>> +    if (const auto *Formatter = MF.getTarget().getMIRFormatter()) {
>> +      return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter);
>> +    }
>> +    LLVM_FALLTHROUGH;
>> +  }
>>   default:
>>     // FIXME: Parse the MCSymbol machine operand.
>>     return error("expected a machine operand");
>> @@ -2507,7 +2632,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
>> }
>> 
>> bool MIParser::parseMachineOperandAndTargetFlags(
>> -    MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
>> +    const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest,
>> +    Optional<unsigned> &TiedDefIdx) {
>>   unsigned TF = 0;
>>   bool HasTargetFlags = false;
>>   if (Token.is(MIToken::kw_target_flags)) {
>> @@ -2539,7 +2665,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
>>       return true;
>>   }
>>   auto Loc = Token.location();
>> -  if (parseMachineOperand(Dest, TiedDefIdx))
>> +  if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx))
>>     return true;
>>   if (!HasTargetFlags)
>>     return false;
>> @@ -2600,30 +2726,31 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
>>   return false;
>> }
>> 
>> -bool MIParser::parseIRValue(const Value *&V) {
>> +static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS,
>> +                         const Value *&V, ErrorCallbackType ErrCB) {
>>   switch (Token.kind()) {
>>   case MIToken::NamedIRValue: {
>> -    V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
>> +    V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
>>     break;
>>   }
>>   case MIToken::IRValue: {
>>     unsigned SlotNumber = 0;
>> -    if (getUnsigned(SlotNumber))
>> +    if (getUnsigned(Token, SlotNumber, ErrCB))
>>       return true;
>> -    V = getIRValue(SlotNumber);
>> +    V = PFS.getIRValue(SlotNumber);
>>     break;
>>   }
>>   case MIToken::NamedGlobalValue:
>>   case MIToken::GlobalValue: {
>>     GlobalValue *GV = nullptr;
>> -    if (parseGlobalValue(GV))
>> +    if (parseGlobalValue(Token, PFS, GV, ErrCB))
>>       return true;
>>     V = GV;
>>     break;
>>   }
>>   case MIToken::QuotedIRValue: {
>>     const Constant *C = nullptr;
>> -    if (parseIRConstant(Token.location(), Token.stringValue(), C))
>> +    if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB))
>>       return true;
>>     V = C;
>>     break;
>> @@ -2632,10 +2759,17 @@ bool MIParser::parseIRValue(const Value *&V) {
>>     llvm_unreachable("The current token should be an IR block reference");
>>   }
>>   if (!V)
>> -    return error(Twine("use of undefined IR value '") + Token.range() + "'");
>> +    return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'");
>>   return false;
>> }
>> 
>> +bool MIParser::parseIRValue(const Value *&V) {
>> +  return ::parseIRValue(
>> +      Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
>> +        return error(Loc, Msg);
>> +      });
>> +}
>> +
>> bool MIParser::getUint64(uint64_t &Result) {
>>   if (Token.hasIntegerValue()) {
>>     if (Token.integerValue().getActiveBits() > 64)
>> @@ -2656,20 +2790,7 @@ bool MIParser::getUint64(uint64_t &Result) {
>> }
>> 
>> bool MIParser::getHexUint(APInt &Result) {
>> -  assert(Token.is(MIToken::HexLiteral));
>> -  StringRef S = Token.range();
>> -  assert(S[0] == '0' && tolower(S[1]) == 'x');
>> -  // This could be a floating point literal with a special prefix.
>> -  if (!isxdigit(S[2]))
>> -    return true;
>> -  StringRef V = S.substr(2);
>> -  APInt A(V.size()*4, V, 16);
>> -
>> -  // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
>> -  // sure it isn't the case before constructing result.
>> -  unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
>> -  Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
>> -  return false;
>> +  return ::getHexUint(Token, Result);
>> }
>> 
>> bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
>> @@ -2756,6 +2877,19 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
>>           "expected a global value or an external symbol after 'call-entry'");
>>     }
>>     break;
>> +  case MIToken::kw_custom: {
>> +    lex();
>> +    if (const auto *Formatter = MF.getTarget().getMIRFormatter()) {
>> +      if (Formatter->parseCustomPseudoSourceValue(
>> +              Token.stringValue(), MF, PFS, PSV,
>> +              [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
>> +                return error(Loc, Msg);
>> +              }))
>> +        return true;
>> +    } else
>> +      return error("unable to parse target custom pseudo source value");
>> +    break;
>> +  }
>>   default:
>>     llvm_unreachable("The current token should be pseudo source value");
>>   }
>> @@ -2767,7 +2901,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
>>   if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
>>       Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
>>       Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
>> -      Token.is(MIToken::kw_call_entry)) {
>> +      Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) {
>>     const PseudoSourceValue *PSV = nullptr;
>>     if (parseMemoryPseudoSourceValue(PSV))
>>       return true;
>> @@ -3018,35 +3152,8 @@ const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
>>   return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
>> }
>> 
>> -static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
>> -                           DenseMap<unsigned, const Value *> &Slots2Values) {
>> -  int Slot = MST.getLocalSlot(V);
>> -  if (Slot == -1)
>> -    return;
>> -  Slots2Values.insert(std::make_pair(unsigned(Slot), V));
>> -}
>> -
>> -/// Creates the mapping from slot numbers to function's unnamed IR values.
>> -static void initSlots2Values(const Function &F,
>> -                             DenseMap<unsigned, const Value *> &Slots2Values) {
>> -  ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
>> -  MST.incorporateFunction(F);
>> -  for (const auto &Arg : F.args())
>> -    mapValueToSlot(&Arg, MST, Slots2Values);
>> -  for (const auto &BB : F) {
>> -    mapValueToSlot(&BB, MST, Slots2Values);
>> -    for (const auto &I : BB)
>> -      mapValueToSlot(&I, MST, Slots2Values);
>> -  }
>> -}
>> -
>> const Value *MIParser::getIRValue(unsigned Slot) {
>> -  if (Slots2Values.empty())
>> -    initSlots2Values(MF.getFunction(), Slots2Values);
>> -  auto ValueInfo = Slots2Values.find(Slot);
>> -  if (ValueInfo == Slots2Values.end())
>> -    return nullptr;
>> -  return ValueInfo->second;
>> +  return PFS.getIRValue(Slot);
>> }
>> 
>> MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
>> @@ -3111,3 +3218,15 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
>>                        MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
>>   return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
>> }
>> +
>> +bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF,
>> +                                PerFunctionMIParsingState &PFS, const Value *&V,
>> +                                ErrorCallbackType ErrorCallback) {
>> +  MIToken Token;
>> +  Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) {
>> +    ErrorCallback(Loc, Msg);
>> +  });
>> +  V = nullptr;
>> +
>> +  return ::parseIRValue(Token, PFS, V, ErrorCallback);
>> +}
>> 
>> diff  --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
>> index b06e34a809fc..9d9c12a95918 100644
>> --- a/llvm/lib/CodeGen/MIRPrinter.cpp
>> +++ b/llvm/lib/CodeGen/MIRPrinter.cpp
>> @@ -709,6 +709,7 @@ void MIPrinter::print(const MachineInstr &MI) {
>>   const auto *TRI = SubTarget.getRegisterInfo();
>>   assert(TRI && "Expected target register info");
>>   const auto *TII = SubTarget.getInstrInfo();
>> +  const auto *MIRF = MF->getTarget().getMIRFormatter();
>>   assert(TII && "Expected target instruction info");
>>   if (MI.isCFIInstruction())
>>     assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
>> @@ -807,7 +808,7 @@ void MIPrinter::print(const MachineInstr &MI) {
>>     for (const auto *Op : MI.memoperands()) {
>>       if (NeedComma)
>>         OS << ", ";
>> -      Op->print(OS, MST, SSNs, Context, &MFI, TII);
>> +      Op->print(OS, MST, SSNs, Context, &MFI, TII, MIRF);
>>       NeedComma = true;
>>     }
>>   }
>> @@ -856,7 +857,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
>>     if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
>>       TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
>>     const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
>> -    Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false,
>> +    Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false,
>>              ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
>>     break;
>>   }
>> @@ -874,6 +875,28 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
>>   }
>> }
>> 
>> +void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
>> +                                ModuleSlotTracker &MST) {
>> +  if (isa<GlobalValue>(V)) {
>> +    V.printAsOperand(OS, /*PrintType=*/false, MST);
>> +    return;
>> +  }
>> +  if (isa<Constant>(V)) {
>> +    // Machine memory operands can load/store to/from constant value pointers.
>> +    OS << '`';
>> +    V.printAsOperand(OS, /*PrintType=*/true, MST);
>> +    OS << '`';
>> +    return;
>> +  }
>> +  OS << "%ir.";
>> +  if (V.hasName()) {
>> +    printLLVMNameWithoutPrefix(OS, V.getName());
>> +    return;
>> +  }
>> +  int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
>> +  MachineOperand::printIRSlotNumber(OS, Slot);
>> +}
>> +
>> void llvm::printMIR(raw_ostream &OS, const Module &M) {
>>   yaml::Output Out(OS);
>>   Out << const_cast<Module &>(M);
>> 
>> diff  --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
>> index 16ae732169a4..177fef80e2e6 100644
>> --- a/llvm/lib/CodeGen/MachineInstr.cpp
>> +++ b/llvm/lib/CodeGen/MachineInstr.cpp
>> @@ -89,13 +89,15 @@ static void tryToGetTargetInfo(const MachineInstr &MI,
>>                                const TargetRegisterInfo *&TRI,
>>                                const MachineRegisterInfo *&MRI,
>>                                const TargetIntrinsicInfo *&IntrinsicInfo,
>> -                               const TargetInstrInfo *&TII) {
>> +                               const TargetInstrInfo *&TII,
>> +                               const MIRFormatter *&MIRF) {
>> 
>>   if (const MachineFunction *MF = getMFIfAvailable(MI)) {
>>     TRI = MF->getSubtarget().getRegisterInfo();
>>     MRI = &MF->getRegInfo();
>>     IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
>>     TII = MF->getSubtarget().getInstrInfo();
>> +    MIRF = MF->getTarget().getMIRFormatter();
>>   }
>> }
>> 
>> @@ -1477,7 +1479,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>   const TargetRegisterInfo *TRI = nullptr;
>>   const MachineRegisterInfo *MRI = nullptr;
>>   const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
>> -  tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII);
>> +  const MIRFormatter *MIRF = nullptr;
>> +  tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII, MIRF);
>> 
>>   if (isCFIInstruction())
>>     assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
>> @@ -1506,7 +1509,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>> 
>>     LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
>>     unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
>> -    MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone,
>> +    MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone,
>>              ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
>>     ++StartOp;
>>   }
>> @@ -1561,7 +1564,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>     const unsigned OpIdx = InlineAsm::MIOp_AsmString;
>>     LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
>>     unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
>> -    getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
>> +    getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone,
>>                             ShouldPrintRegisterTies, TiedOperandIdx, TRI,
>>                             IntrinsicInfo);
>> 
>> @@ -1600,7 +1603,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>       else {
>>         LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
>>         unsigned TiedOperandIdx = getTiedOperandIdx(i);
>> -        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
>> +        MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
>>                  ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
>>       }
>>     } else if (isDebugLabel() && MO.isMetadata()) {
>> @@ -1611,7 +1614,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>       else {
>>         LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
>>         unsigned TiedOperandIdx = getTiedOperandIdx(i);
>> -        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
>> +        MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
>>                  ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
>>       }
>>     } else if (i == AsmDescOp && MO.isImm()) {
>> @@ -1678,7 +1681,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>       if (MO.isImm() && isOperandSubregIdx(i))
>>         MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);
>>       else
>> -        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
>> +        MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
>>                  ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
>>     }
>>   }
>> @@ -1737,7 +1740,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>     for (const MachineMemOperand *Op : memoperands()) {
>>       if (NeedComma)
>>         OS << ", ";
>> -      Op->print(OS, MST, SSNs, *Context, MFI, TII);
>> +      Op->print(OS, MST, SSNs, *Context, MFI, TII, MIRF);
>>       NeedComma = true;
>>     }
>>   }
>> 
>> diff  --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
>> index 8b19501ec3cf..5dd98467ba66 100644
>> --- a/llvm/lib/CodeGen/MachineOperand.cpp
>> +++ b/llvm/lib/CodeGen/MachineOperand.cpp
>> @@ -14,6 +14,7 @@
>> #include "llvm/ADT/StringExtras.h"
>> #include "llvm/Analysis/Loads.h"
>> #include "llvm/Analysis/MemoryLocation.h"
>> +#include "llvm/CodeGen/MIRFormatter.h"
>> #include "llvm/CodeGen/MIRPrinter.h"
>> #include "llvm/CodeGen/MachineFrameInfo.h"
>> #include "llvm/CodeGen/MachineJumpTableInfo.h"
>> @@ -458,28 +459,6 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
>>     OS << "<unknown>";
>> }
>> 
>> -static void printIRValueReference(raw_ostream &OS, const Value &V,
>> -                                  ModuleSlotTracker &MST) {
>> -  if (isa<GlobalValue>(V)) {
>> -    V.printAsOperand(OS, /*PrintType=*/false, MST);
>> -    return;
>> -  }
>> -  if (isa<Constant>(V)) {
>> -    // Machine memory operands can load/store to/from constant value pointers.
>> -    OS << '`';
>> -    V.printAsOperand(OS, /*PrintType=*/true, MST);
>> -    OS << '`';
>> -    return;
>> -  }
>> -  OS << "%ir.";
>> -  if (V.hasName()) {
>> -    printLLVMNameWithoutPrefix(OS, V.getName());
>> -    return;
>> -  }
>> -  int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
>> -  MachineOperand::printIRSlotNumber(OS, Slot);
>> -}
>> -
>> static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
>>                            SyncScope::ID SSID,
>>                            SmallVectorImpl<StringRef> &SSNs) {
>> @@ -734,14 +713,15 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
>>                            const TargetIntrinsicInfo *IntrinsicInfo) const {
>>   tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
>>   ModuleSlotTracker DummyMST(nullptr);
>> -  print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true,
>> +  print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false,
>> +        /*IsStandalone=*/true,
>>         /*ShouldPrintRegisterTies=*/true,
>>         /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
>> }
>> 
>> void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
>> -                           LLT TypeToPrint, bool PrintDef, bool IsStandalone,
>> -                           bool ShouldPrintRegisterTies,
>> +                           LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef,
>> +                           bool IsStandalone, bool ShouldPrintRegisterTies,
>>                            unsigned TiedOperandIdx,
>>                            const TargetRegisterInfo *TRI,
>>                            const TargetIntrinsicInfo *IntrinsicInfo) const {
>> @@ -802,9 +782,16 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>       OS << '(' << TypeToPrint << ')';
>>     break;
>>   }
>> -  case MachineOperand::MO_Immediate:
>> -    OS << getImm();
>> +  case MachineOperand::MO_Immediate: {
>> +    const MIRFormatter *Formatter = nullptr;
>> +    if (const MachineFunction *MF = getMFIfAvailable(*this))
>> +      Formatter = MF->getTarget().getMIRFormatter();
>> +    if (Formatter)
>> +      Formatter->printImm(OS, *getParent(), OpIdx, getImm());
>> +    else
>> +      OS << getImm();
>>     break;
>> +  }
>>   case MachineOperand::MO_CImmediate:
>>     getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
>>     break;
>> @@ -1070,7 +1057,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>                               SmallVectorImpl<StringRef> &SSNs,
>>                               const LLVMContext &Context,
>>                               const MachineFrameInfo *MFI,
>> -                              const TargetInstrInfo *TII) const {
>> +                              const TargetInstrInfo *TII,
>> +                              const MIRFormatter* MIRF) const {
>>   OS << '(';
>>   if (isVolatile())
>>     OS << "volatile ";
>> @@ -1111,7 +1099,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
>> 
>>   if (const Value *Val = getValue()) {
>>     OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
>> -    printIRValueReference(OS, *Val, MST);
>> +    MIRFormatter::printIRValue(OS, *Val, MST);
>>   } else if (const PseudoSourceValue *PVal = getPseudoValue()) {
>>     OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
>>     assert(PVal && "Expected a pseudo source value");
>> @@ -1144,15 +1132,20 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
>>       printLLVMNameWithoutPrefix(
>>           OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
>>       break;
>> -    default:
>> +    default: {
>>       // FIXME: This is not necessarily the correct MIR serialization format for
>>       // a custom pseudo source value, but at least it allows
>>       // -print-machineinstrs to work on a target with custom pseudo source
>>       // values.
>> -      OS << "custom ";
>> -      PVal->printCustom(OS);
>> +      OS << "custom \"";
>> +      if (MIRF)
>> +        MIRF->printCustomPseudoSourceValue(OS, MST, *PVal);
>> +      else
>> +        PVal->printCustom(OS);
>> +      OS << '\"';
>>       break;
>>     }
>> +    }
>>   }
>>   MachineOperand::printOperandOffset(OS, getOffset());
>>   if (getBaseAlignment() != getSize())
>> 
>> diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
>> index 6fd71393bf38..cbdcb93e60c9 100644
>> --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
>> +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
>> @@ -480,7 +480,8 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
>>   if (MF)
>>     MST.incorporateFunction(MF->getFunction());
>>   SmallVector<StringRef, 0> SSNs;
>> -  MMO.print(OS, MST, SSNs, Ctx, MFI, TII);
>> +  MMO.print(OS, MST, SSNs, Ctx, MFI, TII,
>> +            MF ? MF->getTarget().getMIRFormatter() : nullptr);
>> }
>> 
>> static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
>> 
>> diff  --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
>> index 97a1eb2f190a..f070b143d5b4 100644
>> --- a/llvm/lib/Target/TargetMachine.cpp
>> +++ b/llvm/lib/Target/TargetMachine.cpp
>> @@ -12,6 +12,7 @@
>> 
>> #include "llvm/Target/TargetMachine.h"
>> #include "llvm/Analysis/TargetTransformInfo.h"
>> +#include "llvm/CodeGen/MIRFormatter.h"
> 
> There is a cyclic dependency here. lib/CodeGen depends on lib/Target
> which now depends on lib/CodeGen. Can the formatter pointer be stored
> somewhere else?

+Peng

>> #include "llvm/IR/Function.h"
>> #include "llvm/IR/GlobalAlias.h"
>> #include "llvm/IR/GlobalValue.h"
>> @@ -37,7 +38,9 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
>>     : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
>>       TargetFS(FS), AsmInfo(nullptr), MRI(nullptr), MII(nullptr), STI(nullptr),
>>       RequireStructuredCFG(false), O0WantsFastISel(false),
>> -      DefaultOptions(Options), Options(Options) {}
>> +      DefaultOptions(Options), Options(Options) {
>> +  MIRF = std::make_unique<MIRFormatter>();
>> +}
>> 
>> TargetMachine::~TargetMachine() = default;
>> 
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
>> index dd4f892ebc23..ce71a89adacb 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
>> @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; UNPACKED:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; PACKED:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
>>   ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
>>   ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
>>   ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
>>   ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
>>   ret void
>> @@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; PACKED:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; PACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
>>   ; UNPACKED:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; UNPACKED:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; UNPACKED:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; UNPACKED:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; UNPACKED:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
>>   ; PACKED:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; PACKED:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; PACKED:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; PACKED:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; PACKED:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; PACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
>>   ret void
>> @@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; PACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
>>   ret void
>> @@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -323,7 +323,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; PACKED:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 16
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -359,7 +359,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; PACKED:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4095
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -384,7 +384,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
>>   ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %23, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %23, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; UNPACKED:   S_ENDPGM 0
>>   ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
>>   ; PACKED: bb.1 (%ir-block.0):
>> @@ -399,7 +399,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; PACKED:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
>>   ; PACKED:   %14:vgpr_32, dead %15:sreg_64 = V_ADD_I32_e64 [[COPY5]], killed [[V_MOV_B32_e32_]], 0, implicit $exec
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; PACKED:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4096
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -445,7 +445,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; UNPACKED:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
>>   ; UNPACKED:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; UNPACKED:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %48, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; UNPACKED:   BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %48, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; UNPACKED:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; UNPACKED:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; UNPACKED:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -485,7 +485,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; PACKED:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
>>   ; PACKED:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; PACKED:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; PACKED:   BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %32, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; PACKED:   BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %32, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; PACKED:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; PACKED:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; PACKED:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
>> index 75d25b0c2c46..aea37fd08b40 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
>> @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
>>   ; CHECK:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
>>   ; CHECK:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
>>   ret void
>> @@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>>   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
>>   ret void
>> @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>>   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
>>   ret void
>> @@ -199,7 +199,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 16, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 16, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 16
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -220,7 +220,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4095
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -243,7 +243,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>>   ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
>>   ; CHECK:   %16:vgpr_32, dead %17:sreg_64 = V_ADD_I32_e64 [[COPY6]], killed [[V_MOV_B32_e32_]], 0, implicit $exec
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %16, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %16, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4096
>>   call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -286,7 +286,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %34, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %34, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
>> index 4db5fe081fda..c5aa36df8675 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
>> @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
>>   ; CHECK:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
>>   ; CHECK:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
>>   ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
>>   ; CHECK:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
>>   ret void
>> @@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
>>   ret void
>> @@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
>>   ret void
>> @@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
>>   ret void
>> @@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
>>   ret void
>> @@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
>>   ret void
>> @@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
>>   ret void
>> @@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>> -  ; CHECK:   BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
>> -  ; CHECK:   BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "TargetCustom7", addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %val.trunc = trunc i32 %val to i8
>>   call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>> @@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %val.trunc = trunc i32 %val to i16
>>   call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>> @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
>> -  ; CHECK:   BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
>>   ; CHECK:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
>>   ; CHECK:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
>>   ret void
>> @@ -516,7 +516,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
>>   ; CHECK:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
>>   ret void
>> @@ -534,7 +534,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 16
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -553,7 +553,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4095
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -574,7 +574,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
>>   ; CHECK:   %14:vgpr_32, dead %15:sreg_64 = V_ADD_I32_e64 [[COPY5]], killed [[V_MOV_B32_e32_]], 0, implicit $exec
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4096
>>   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -593,7 +593,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
>>   ret void
>> @@ -611,7 +611,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
>>   ret void
>> @@ -629,7 +629,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 16
>>   call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -648,7 +648,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4095
>>   call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -669,7 +669,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
>>   ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
>>   ; CHECK:   %14:vgpr_32, dead %15:sreg_64 = V_ADD_I32_e64 [[COPY5]], killed [[V_MOV_B32_e32_]], 0, implicit $exec
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   %voffset.add = add i32 %voffset, 4096
>>   call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
>> @@ -707,7 +707,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %30, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %30, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 5000, align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -750,7 +750,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
>> -  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
>> +  ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 5000, align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
>> index 44a17012237e..d4a3f4025b37 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
>> @@ -18,7 +18,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32
>>   ; CHECK:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
>>   ; CHECK:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
>>   ; CHECK:   G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
>>   ; CHECK:   S_ENDPGM 0
>> @@ -44,7 +44,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
>>   ; CHECK:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
>>   ; CHECK:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
>>   ; CHECK:   G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
>>   ; CHECK:   S_ENDPGM 0
>> @@ -98,7 +98,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -160,7 +160,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
>> index c59372a8d09c..e5d67a3f8874 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
>> @@ -23,7 +23,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
>>   ; CHECK:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
>>   ; CHECK:   G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
>>   ; CHECK:   S_ENDPGM 0
>> @@ -54,7 +54,7 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
>>   ; CHECK:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
>>   ; CHECK:   [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
>>   ; CHECK:   G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
>>   ; CHECK:   S_ENDPGM 0
>> @@ -113,7 +113,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -169,7 +169,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -247,7 +247,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
>> index 33a8e9a1284c..4443daba2ee2 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
>> @@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
>>   ; CHECK:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   $vgpr0 = COPY [[INT]](s32)
>>   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
>>   %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>> @@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr
>>   ; CHECK:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
>>   ; CHECK:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   $vgpr0 = COPY [[INT]](s32)
>>   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
>>   %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
>> @@ -72,7 +72,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -108,7 +108,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
>>   ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
>>   ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -156,7 +156,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
>>   ; CHECK:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
>>   ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
>> index 9bc81aecc8a1..a657488278b0 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
>> @@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
>>   ; CHECK:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   $vgpr0 = COPY [[INT]](s32)
>>   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
>>   %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
>> @@ -37,7 +37,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sg
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
>>   ; CHECK:   [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
>>   ; CHECK:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   $vgpr0 = COPY [[INT]](s32)
>>   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
>>   %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
>> @@ -76,7 +76,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -113,7 +113,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp
>>   ; CHECK:   [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
>>   ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -162,7 +162,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
>>   ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
>> -  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
>> index efe81eabc349..a6ba559382f5 100644
>> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
>> @@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
>>   ; CHECK:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
>>   ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
>> -  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sg
>>   ; CHECK:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
>>   ; CHECK:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
>>   ; CHECK:   [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32)
>> -  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   S_ENDPGM 0
>>   call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
>>   ret void
>> @@ -76,7 +76,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
>>   ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
>>   ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
>> -  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -111,7 +111,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
>>   ; CHECK:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
>> -  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> @@ -158,7 +158,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
>>   ; CHECK:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
>>   ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
>>   ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
>> -  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; CHECK:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
>>   ; CHECK:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
>>   ; CHECK:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
>> index f96a13878ba6..ce62e041aa67 100644
>> --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
>> @@ -10,218 +10,218 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
>>   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
>>   ; GCN:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
>>   ; GCN:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 16, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 16, align 1, addrspace 4)
>>   ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 32, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 32, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 48, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 48, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 64, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 64, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 80, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 80, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1
>>   ; GCN:   [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 96, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 96, align 1, addrspace 4)
>>   ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
>>   ; GCN:   [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0
>>   ; GCN:   [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF2]].sub0
>>   ; GCN:   [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF3]].sub0
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
>> -  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom TargetCustom7 + 112, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (load store 4 on custom TargetCustom7, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom TargetCustom7, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom TargetCustom7, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7" + 112, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 128, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 128, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 128, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 144, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 144, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 144, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 160, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 160, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 160, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY8]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY8]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 176, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4)
>>   ; GCN:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[DEF4]].sub0
>>   ; GCN:   [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88
>>   ; GCN:   [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 176, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4)
>>   ; GCN:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[DEF5]].sub0
>>   ; GCN:   [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176
>>   ; GCN:   [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 176, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4)
>>   ; GCN:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF6]].sub0
>>   ; GCN:   [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[DEF7]].sub0
>>   ; GCN:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>>   ; GCN:   [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY13]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY13]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[DEF8]].sub0
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 192, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 192, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 192, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY15]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY15]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 208, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 208, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 208, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY16]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY16]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY17]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 224, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY17]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112
>>   ; GCN:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY18]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 224, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY18]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224
>>   ; GCN:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 224, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4)
>>   ; GCN:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY22]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 240, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY22]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120
>>   ; GCN:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY23]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 240, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY23]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240
>>   ; GCN:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY24]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 240, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY24]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[COPY26:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[COPY26]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[COPY26]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY27]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 256, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY27]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4)
>>   ; GCN:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 256, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256
>>   ; GCN:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 256, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[COPY31:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[COPY31]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[COPY31]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY32]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 272, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY32]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4)
>>   ; GCN:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[DEF9]].sub0
>>   ; GCN:   [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136
>>   ; GCN:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 272, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4)
>>   ; GCN:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[DEF10]].sub0
>>   ; GCN:   [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272
>>   ; GCN:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 272, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4)
>>   ; GCN:   [[COPY37:%[0-9]+]]:vgpr_32 = COPY [[DEF11]].sub0
>>   ; GCN:   [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[DEF12]].sub0
>>   ; GCN:   [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[COPY40:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>>   ; GCN:   [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[DEF13]].sub0
>>   ; GCN:   [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY42:%[0-9]+]]:vgpr_32 = COPY [[DEF14]].sub0
>>   ; GCN:   [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[DEF15]].sub0
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 288, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4)
>>   ; GCN:   [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY45]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 288, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY45]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288
>>   ; GCN:   [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY46]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 288, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY46]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[COPY48:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[COPY48]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[COPY48]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   INLINEASM &"", 1
>>   ; GCN:   [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY49]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 304, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY49]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152
>>   ; GCN:   [[COPY50:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY50]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 304, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY50]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4)
>>   ; GCN:   [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304
>>   ; GCN:   [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY51]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 304, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY51]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
>>   ; GCN:   [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]]
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> -  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   S_ENDPGM 0
>> bb.0:
>>   %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
>> index 0c264251942a..93322c7da4f8 100644
>> --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
>> @@ -12,7 +12,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
>>   ; GCN:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
>>   ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
>>   ; GCN:   [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
>> -  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
>>   ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
>>   ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
>> @@ -21,7 +21,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
>>   ; GCN:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
>>   ; GCN:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
>>   ; GCN:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
>> -  ; GCN:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
>> +  ; GCN:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4)
>>   ; GCN:   S_ENDPGM 0
>> main_body:
>>   %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
>> 
>> diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
>> index cdcf7383afc0..068e3d98f17b 100644
>> --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
>> +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
>> @@ -27,7 +27,7 @@
>> 
>> ; MIR-LABEL: name: gws_barrier_offset0{{$}}
>> ; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec {
>> -; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, -1, implicit $m0, implicit $exec :: (load 4 from custom GWSResource)
>> +; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, -1, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource")
>> ; MIR-NEXT: S_WAITCNT 0
>> ; MIR-NEXT: }
>> define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
>> 
>> diff  --git a/llvm/unittests/CodeGen/MachineOperandTest.cpp b/llvm/unittests/CodeGen/MachineOperandTest.cpp
>> index faa471f2260c..7e60fab28154 100644
>> --- a/llvm/unittests/CodeGen/MachineOperandTest.cpp
>> +++ b/llvm/unittests/CodeGen/MachineOperandTest.cpp
>> @@ -310,7 +310,7 @@ TEST(MachineOperandTest, PrintMetadata) {
>>   std::string str;
>>   // Print a MachineOperand containing a metadata node.
>>   raw_string_ostream OS(str);
>> -  MO.print(OS, MST, LLT{}, /*PrintDef=*/false, /*IsStandalone=*/false,
>> +  MO.print(OS, MST, LLT{}, /*OpIdx*/~0U, /*PrintDef=*/false, /*IsStandalone=*/false,
>>            /*ShouldPrintRegisterTies=*/false, 0, /*TRI=*/nullptr,
>>            /*IntrinsicInfo=*/nullptr);
>>   ASSERT_TRUE(OS.str() == "!0");
>> 
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200109/2d85f96d/attachment-0001.html>


More information about the llvm-commits mailing list