[llvm] r359621 - [AMDGPU] gfx1010 VMEM and SMEM implementation
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 30 15:08:23 PDT 2019
Author: rampitec
Date: Tue Apr 30 15:08:23 2019
New Revision: 359621
URL: http://llvm.org/viewvc/llvm-project?rev=359621&view=rev
Log:
[AMDGPU] gfx1010 VMEM and SMEM implementation
Differential Revision: https://reviews.llvm.org/D61330
Added:
llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
llvm/trunk/test/MC/AMDGPU/flat-gfx10.s
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SMInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir
llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir
llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir
llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir
llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir
llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir
llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir
llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir
llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir
llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir
llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir
llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll
llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir
llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir
llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir
llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir
llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir
llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir
llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir
llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll
llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir
llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
llvm/trunk/test/MC/AMDGPU/flat-global.s
llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Tue Apr 30 15:08:23 2019
@@ -1431,6 +1431,14 @@ def int_amdgcn_ds_bpermute :
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
//===----------------------------------------------------------------------===//
+// GFX10 Intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_amdgcn_s_get_waveid_in_workgroup :
+ GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
+ Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
+
+//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Tue Apr 30 15:08:23 2019
@@ -125,10 +125,10 @@ private:
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const;
+ SDValue &TFE, SDValue &DLC) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const;
+ SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -141,19 +141,19 @@ private:
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const;
+ SDValue &TFE, SDValue &DLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
- bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+ bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
- bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
+ bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
template <bool IsSigned>
- bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
@@ -1221,7 +1221,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const {
+ SDValue &TFE, SDValue &DLC) const {
// Subtarget prefers to use flat instruction
if (Subtarget->useFlatForGlobal())
return false;
@@ -1233,6 +1233,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
if (!SLC.getNode())
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1311,7 +1312,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const {
+ SDValue &SLC, SDValue &TFE,
+ SDValue &DLC) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
@@ -1319,7 +1321,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr
return false;
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE))
+ GLC, SLC, TFE, DLC))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1341,9 +1343,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
- SDValue GLC, TFE;
+ SDValue GLC, TFE, DLC;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1468,13 +1470,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScra
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const {
+ SDValue &TFE, SDValue &DLC) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE))
+ GLC, SLC, TFE, DLC))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1496,57 +1498,42 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffs
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
- SDValue GLC, SLC, TFE;
+ SDValue GLC, SLC, TFE, DLC;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &SLC) const {
- SDValue GLC, TFE;
+ SDValue GLC, TFE, DLC;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
}
template <bool IsSigned>
-bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
+ SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- int64_t OffsetVal = 0;
-
- if (Subtarget->hasFlatInstOffsets() &&
- CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
- if ((IsSigned && isInt<13>(COffsetVal)) ||
- (!IsSigned && isUInt<12>(COffsetVal))) {
- Addr = N0;
- OffsetVal = COffsetVal;
- }
- }
-
- VAddr = Addr;
- Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
- SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
-
- return true;
+ return static_cast<const SITargetLowering*>(getTargetLowering())->
+ SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
+ SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
+ return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
+ SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
+ return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Apr 30 15:08:23 2019
@@ -2886,6 +2886,61 @@ bool AMDGPUTargetLowering::shouldCombine
return true;
}
+// Find a load or store from corresponding pattern root.
+// Roots may be build_vector, bitconvert or their combinations.
+static MemSDNode* findMemSDNode(SDNode *N) {
+ N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
+ if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
+ return MN;
+ assert(isa<BuildVectorSDNode>(N));
+ for (SDValue V : N->op_values())
+ if (MemSDNode *MN =
+ dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
+ return MN;
+ llvm_unreachable("cannot find MemSDNode in the pattern!");
+}
+
+bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
+ SelectionDAG &DAG,
+ SDNode *N,
+ SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ const GCNSubtarget &ST =
+ DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
+ int64_t OffsetVal = 0;
+
+ if (ST.hasFlatInstOffsets() &&
+ (!ST.hasFlatSegmentOffsetBug() ||
+ findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
+ DAG.isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ if ((IsSigned && isInt<12>(COffsetVal)) ||
+ (!IsSigned && isUInt<11>(COffsetVal))) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ }
+ } else {
+ if ((IsSigned && isInt<13>(COffsetVal)) ||
+ (!IsSigned && isUInt<12>(COffsetVal))) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ }
+ }
+ }
+
+ VAddr = Addr;
+ Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+ SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1);
+
+ return true;
+}
+
// Replace load of an illegal type with a store of a bitcast to a friendlier
// type.
SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Apr 30 15:08:23 2019
@@ -323,6 +323,10 @@ public:
}
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+
+ bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
+ SDValue Addr, SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const;
};
namespace AMDGPUISD {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Tue Apr 30 15:08:23 2019
@@ -356,7 +356,8 @@ bool AMDGPUInstructionSelector::selectG_
.add(I.getOperand(0))
.addImm(0) // offset
.addImm(0) // glc
- .addImm(0); // slc
+ .addImm(0) // slc
+ .addImm(0); // dlc
// Now that we selected an opcode, we need to constrain the register
@@ -532,7 +533,8 @@ bool AMDGPUInstructionSelector::selectG_
.addReg(PtrReg)
.addImm(0) // offset
.addImm(0) // glc
- .addImm(0); // slc
+ .addImm(0) // slc
+ .addImm(0); // dlc
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
I.eraseFromParent();
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Apr 30 15:08:23 2019
@@ -139,6 +139,7 @@ public:
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
+ ImmTyDLC,
ImmTyGLC,
ImmTySLC,
ImmTyTFE,
@@ -314,6 +315,7 @@ public:
bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
+ bool isDLC() const { return isImmTy(ImmTyDLC); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
@@ -676,6 +678,7 @@ public:
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
+ case ImmTyDLC: OS << "DLC"; break;
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
@@ -1184,6 +1187,7 @@ public:
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
+ AMDGPUOperand::Ptr defaultDLC() const;
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultSLC() const;
@@ -2303,13 +2307,26 @@ unsigned AMDGPUAsmParser::checkTargetMat
}
}
- if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
+ if (TSFlags & SIInstrFlags::FLAT) {
// FIXME: Produces error without correct column reported.
- auto OpNum =
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
+ auto Opcode = Inst.getOpcode();
+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+
const auto &Op = Inst.getOperand(OpNum);
- if (Op.getImm() != 0)
+ if (!hasFlatOffsets() && Op.getImm() != 0)
return Match_InvalidOperand;
+
+ // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
+ // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
+ if (isGFX10()) {
+ if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
+ if (!isInt<12>(Op.getImm()))
+ return Match_InvalidOperand;
+ } else {
+ if (!isUInt<11>(Op.getImm()))
+ return Match_InvalidOperand;
+ }
+ }
}
return Match_Success;
@@ -3887,6 +3904,9 @@ AMDGPUAsmParser::parseNamedBit(const cha
}
}
+ if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
+ return MatchOperand_ParseFail;
+
Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
@@ -5101,6 +5121,10 @@ AMDGPUAsmParser::parseSOppBrTarget(Opera
// mubuf
//===----------------------------------------------------------------------===//
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
+}
+
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
@@ -5177,6 +5201,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCIns
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
+
+ if (isGFX10())
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@@ -5214,6 +5241,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &I
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+
+ if (isGFX10())
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}
//===----------------------------------------------------------------------===//
@@ -5249,8 +5279,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
}
}
+ bool IsGFX10 = isGFX10();
+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
+ if (IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
@@ -5353,6 +5387,7 @@ static const OptionalOperand AMDGPUOptio
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
+ {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
{"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
@@ -5581,7 +5616,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &In
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}
- // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
+ // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
// it has src2 register operand that is tied to dst operand
// we don't allow modifiers for this operand in assembler so src2_modifiers
// should be 0.
@@ -6031,7 +6066,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &In
break;
case SIInstrFlags::VOPC:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Tue Apr 30 15:08:23 2019
@@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===//
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
@@ -96,7 +96,9 @@ class MTBUF_Pseudo <string opName, dag o
bits<1> has_vdata = 1;
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
+ bits<1> has_dlc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
+ bits<1> dlc_value = 0; // the value for dlc if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
@@ -119,6 +121,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
bits<12> offset;
bits<1> glc;
+ bits<1> dlc;
bits<7> format;
bits<8> vaddr;
bits<8> vdata;
@@ -137,17 +140,17 @@ class getMTBUFInsDA<list<RegisterClass>
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
);
dag InsData = !if(!empty(vaddrList),
(ins vdataClass:$vdata, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe),
+ SLC:$slc, TFE:$tfe, DLC:$dlc),
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe)
+ SLC:$slc, TFE:$tfe, DLC:$dlc)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
@@ -198,7 +201,7 @@ class MTBUF_Load_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs vdataClass:$vdata),
getMTBUFIns<addrKindCopy>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -213,13 +216,13 @@ multiclass MTBUF_Pseudo_Loads<string opN
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
- i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
- i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -244,7 +247,7 @@ class MTBUF_Store_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs),
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -259,13 +262,13 @@ multiclass MTBUF_Pseudo_Stores<string op
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i8:$format, i1:$glc,
- i1:$slc, i1:$tfe))]>,
+ i1:$slc, i1:$tfe, i1:$dlc))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i8:$format, i1:$glc,
- i1:$slc, i1:$tfe))]>,
+ i1:$slc, i1:$tfe, i1:$dlc))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -323,7 +326,9 @@ class MUBUF_Pseudo <string opName, dag o
bits<1> has_vdata = 1;
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
+ bits<1> has_dlc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
+ bits<1> dlc_value = 0; // the value for dlc if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
@@ -332,7 +337,7 @@ class MUBUF_Pseudo <string opName, dag o
bits<4> dwords = 0;
}
-class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
+class MUBUF_Real <MUBUF_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
let isPseudo = 0;
@@ -347,6 +352,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseu
bits<12> offset;
bits<1> glc;
+ bits<1> dlc;
bits<8> vaddr;
bits<8> vdata;
bits<7> srsrc;
@@ -357,7 +363,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseu
// For cache invalidation instructions.
-class MUBUF_Invalidate <string opName, SDPatternOperator node> :
+class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> {
let AsmMatchConverter = "";
@@ -372,7 +378,9 @@ class MUBUF_Invalidate <string opName, S
let has_vdata = 0;
let has_vaddr = 0;
let has_glc = 0;
+ let has_dlc = 0;
let glc_value = 0;
+ let dlc_value = 0;
let has_srsrc = 0;
let has_soffset = 0;
let has_offset = 0;
@@ -399,7 +407,7 @@ class getMUBUFInsDA<list<RegisterClass>
);
dag ret = !con(
!if(!empty(vdataList), InsNoData, InsData),
- !if(isLds, (ins), (ins TFE:$tfe))
+ !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
);
}
@@ -459,7 +467,7 @@ class MUBUF_Load_Pseudo <string opName,
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
!if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))),
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
- !if(isLds, " lds", "$tfe"),
+ !if(isLds, " lds", "$tfe") # "$dlc",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -489,7 +497,7 @@ multiclass MUBUF_Pseudo_Loads<string opN
!if(isLds,
[],
[(set load_vt:$vdata,
- (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+ (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
@@ -497,7 +505,7 @@ multiclass MUBUF_Pseudo_Loads<string opN
!if(isLds,
[],
[(set load_vt:$vdata,
- (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+ (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
@@ -530,7 +538,7 @@ class MUBUF_Store_Pseudo <string opName,
: MUBUF_Pseudo<opName,
(outs),
getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -546,12 +554,12 @@ multiclass MUBUF_Pseudo_Stores<string op
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
MUBUFAddr64Table<0, NAME>;
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
MUBUFAddr64Table<1, NAME>;
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -637,6 +645,7 @@ class MUBUF_Atomic_Pseudo<string opName,
let hasSideEffects = 1;
let DisableWQM = 1;
let has_glc = 0;
+ let has_dlc = 0;
let has_tfe = 0;
let maybeAtomic = 1;
}
@@ -655,6 +664,7 @@ class MUBUF_AtomicNoRet_Pseudo<string op
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let glc_value = 0;
+ let dlc_value = 0;
let AsmMatchConverter = "cvtMubufAtomic";
}
@@ -672,6 +682,7 @@ class MUBUF_AtomicRet_Pseudo<string opNa
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
let glc_value = 1;
+ let dlc_value = 0;
let Constraints = "$vdata = $vdata_in";
let DisableEncoding = "$vdata_in";
let AsmMatchConverter = "cvtMubufAtomicReturn";
@@ -1051,6 +1062,11 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidat
} // End let SubtargetPredicate = isGFX7Plus
+let SubtargetPredicate = isGFX10Plus in {
+ def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
+ def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
+} // End SubtargetPredicate = isGFX10Plus
+
//===----------------------------------------------------------------------===//
// MUBUF Patterns
//===----------------------------------------------------------------------===//
@@ -1063,6 +1079,10 @@ def extract_slc : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
}]>;
+def extract_dlc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
+}]>;
+
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1073,21 +1093,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatt
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1096,7 +1116,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatt
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
@@ -1144,21 +1164,23 @@ multiclass MUBUF_StoreIntrinsicPat<SDPat
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (as_i16imm $offset), (extract_glc $cachepolicy),
+ (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (as_i16imm $offset), (extract_glc $cachepolicy),
+ (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1167,8 +1189,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPat
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
+ (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
@@ -1322,8 +1344,8 @@ def : GCNPat<
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
@@ -1331,12 +1353,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUB
def : GCNPat <
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
>;
def : GCNPat <
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
- (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+ (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
>;
}
@@ -1355,8 +1377,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseu
def : GCNPat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
- (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+ (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
>;
}
@@ -1377,12 +1399,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Ps
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
def : GCNPat <
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
}
@@ -1392,12 +1414,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBU
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
>;
def : GCNPat <
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
>;
}
@@ -1435,12 +1457,12 @@ multiclass MUBUFStore_Atomic_Pattern <MU
def : GCNPat <
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc), vt:$val),
- (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+ (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
>;
def : GCNPat <
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
- (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
@@ -1454,8 +1476,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pse
def : GCNPat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
- (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
+ (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
>;
}
@@ -1468,13 +1490,13 @@ multiclass MUBUFScratchStorePat <MUBUF_P
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset)),
- (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
u16imm:$offset)),
- (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
}
@@ -1512,7 +1534,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1520,7 +1542,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1528,7 +1550,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1538,7 +1560,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatt
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
@@ -1570,7 +1592,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1578,7 +1600,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1586,7 +1608,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1596,7 +1618,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPat
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
@@ -1626,24 +1648,18 @@ let SubtargetPredicate = HasPackedD16VMe
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Base ENC_MUBUF for GFX6, GFX7.
+// Base ENC_MUBUF for GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real<op, ps>,
- Enc64,
- SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
- let AssemblerPredicate=isGFX6GFX7;
- let DecoderNamespace="GFX6GFX7";
-
+class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
+ MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{15} = ps.addr64;
let Inst{16} = !if(ps.lds, 1, 0);
let Inst{24-18} = op;
- let Inst{31-26} = 0x38; //encoding
+ let Inst{31-26} = 0x38;
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
@@ -1652,125 +1668,250 @@ class MUBUF_Real_si <bits<7> op, MUBUF_P
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-multiclass MUBUF_Real_AllAddr_si<bits<7> op> {
- def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
- def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
- def _OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
- def _IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
- def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
-
-multiclass MUBUF_Real_AllAddr_Lds_si<bits<7> op> {
-
- def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
- MUBUFLdsTable<0, NAME # "_OFFSET_si">;
- def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
- MUBUFLdsTable<0, NAME # "_ADDR64_si">;
- def _OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
- MUBUFLdsTable<0, NAME # "_OFFEN_si">;
- def _IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
- MUBUFLdsTable<0, NAME # "_IDXEN_si">;
- def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
- MUBUFLdsTable<0, NAME # "_BOTHEN_si">;
-
- def _LDS_OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
- MUBUFLdsTable<1, NAME # "_OFFSET_si">;
- def _LDS_ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
- MUBUFLdsTable<1, NAME # "_ADDR64_si">;
- def _LDS_OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
- MUBUFLdsTable<1, NAME # "_OFFEN_si">;
- def _LDS_IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
- MUBUFLdsTable<1, NAME # "_IDXEN_si">;
- def _LDS_BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
- MUBUFLdsTable<1, NAME # "_BOTHEN_si">;
-}
-
-multiclass MUBUF_Real_Atomic_si<bits<7> op> : MUBUF_Real_AllAddr_si<op> {
- def _OFFSET_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
- def _ADDR64_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
- def _OFFEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
- def _IDXEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
- def _BOTHEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
-}
-
-defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_si <0x00>;
-defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>;
-defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>;
-defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>;
-defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>;
-defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>;
-defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>;
-defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_si <0x08>;
-defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_si <0x09>;
-defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_si <0x0a>;
-defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_si <0x0b>;
-defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_si <0x0c>;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>;
-defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_si <0x0f>;
-defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_si <0x18>;
-defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_si <0x1a>;
-defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_si <0x1c>;
-defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_si <0x1d>;
-defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_si <0x1e>;
-defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_si <0x1f>;
-
-defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_si <0x30>;
-defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_si <0x31>;
-defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_si <0x32>;
-defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_si <0x33>;
-//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomic_si <0x34>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_si <0x35>;
-defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_si <0x36>;
-defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_si <0x37>;
-defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_si <0x38>;
-defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_si <0x39>;
-defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_si <0x3a>;
-defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_si <0x3b>;
-defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_si <0x3c>;
-defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_si <0x3d>;
-
-//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_si <0x3e>; // isn't on VI
-//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_si <0x3f>; // isn't on VI
-//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_si <0x40>; // isn't on VI
-defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_si <0x50>;
-defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_si <0x51>;
-defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_si <0x52>;
-defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_si <0x53>;
-//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomic_si <0x54>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_si <0x55>;
-defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_si <0x56>;
-defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_si <0x57>;
-defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_si <0x58>;
-defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_si <0x59>;
-defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>;
-defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>;
-defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>;
-defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>;
-// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI.
-//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI
-//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI
-//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI
+class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
+ let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+ let Inst{25} = op{7};
+}
-def BUFFER_WBINVL1_SC_si : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>;
-def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
+class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+ let Inst{15} = ps.addr64;
+}
-class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
- MTBUF_Real<ps>,
- Enc64,
- SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
- let AssemblerPredicate=isGFX6GFX7;
- let DecoderNamespace="GFX6GFX7";
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName,
+ string asmName> {
+ def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> {
+ MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName);
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> {
+ def _BOTHEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+ multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> {
+ def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+ MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
+ def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+ MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
+ def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+ MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
+ def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+ MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
+
+ def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+ MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
+ def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+ MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
+ def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+ MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
+ def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+ MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
+ }
+ multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx10<op> {
+ def _BOTHEN_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+ def _IDXEN_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+ def _OFFEN_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+ def _OFFSET_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
+defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>;
+defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>;
+defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>;
+defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>;
+defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>;
+defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>;
+defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>;
+// FIXME-GFX10: Add following instructions:
+//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>;
+//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
+defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>;
+defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>;
+defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>;
+defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>;
+defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>;
+defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>;
+defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>;
+defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>;
+
+def BUFFER_GL0_INV_gfx10 :
+ MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>;
+def BUFFER_GL1_INV_gfx10 :
+ MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in {
+ multiclass MUBUF_Real_gfx6<bits<8> op> {
+ def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+ }
+} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6"
+
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+ multiclass MUBUF_Real_gfx7<bits<8> op> {
+ def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+ }
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> {
+ def _ADDR64_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _BOTHEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> {
+ def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+ MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
+ def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
+ MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
+ def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+ MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
+ def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+ MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
+ def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+ MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
+
+ def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+ MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
+ def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
+ MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
+ def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+ MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
+ def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+ MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
+ def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+ MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
+ }
+ multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx6_gfx7<op> {
+ def _ADDR64_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
+ def _BOTHEN_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+ def _IDXEN_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+ def _OFFEN_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+ def _OFFSET_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>;
+
+multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>;
+
+multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
+ MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
+
+// FIXME-GFX6: Following instructions are available only on GFX6.
+//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
+//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>;
+
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
+defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>;
+defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>;
+defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>;
+defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>;
+defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>;
+defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>;
+defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>;
+defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>;
+defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>;
+defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>;
+defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>;
+
+defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>;
+defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>;
+defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>;
+defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>;
+defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
+defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
+defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
+//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
+//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
+defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
+defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
+defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>;
+defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>;
+defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>;
+defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>;
+defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>;
+defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>;
+defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>;
+defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
+defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
+defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
+// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
+//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
+//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+
+defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
+defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
+def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
+
+//===----------------------------------------------------------------------===//
+// Base ENC_MTBUF for GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
+ MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{15} = ps.addr64;
let Inst{18-16} = op;
- let Inst{22-19} = dfmt;
- let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1780,43 +1921,83 @@ class MTBUF_Real_si <bits<3> op, MTBUF_P
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
- def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
- def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
- def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
- def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
- def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
+//===----------------------------------------------------------------------===//
+// MTBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
+ Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
+ let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+ let Inst{25-19} = format;
+ let Inst{53} = op{3};
+}
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> {
+ def _BOTHEN_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
-defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
-defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
-defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
+defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>;
+defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>;
+defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>;
+defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>;
+defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>;
+defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>;
//===----------------------------------------------------------------------===//
-// CI
-// MTBUF - GFX6, GFX7.
+// MTBUF - GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real_si<op, ps> {
- let AssemblerPredicate = isGFX7Only;
- let DecoderNamespace = "GFX7";
+class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> :
+ Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> {
+ let Inst{15} = ps.addr64;
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
}
-def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>;
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> {
+ def _ADDR64_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _BOTHEN_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> :
+ MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
//===----------------------------------------------------------------------===//
-// GFX8, GFX9 (VI).
+// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real<op, ps>,
+ MUBUF_Real<ps>,
Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
let AssemblerPredicate = isGFX8GFX9;
@@ -1866,7 +2047,7 @@ multiclass MUBUF_Real_AllAddr_Lds_vi<bit
}
class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real<op, ps>,
+ MUBUF_Real<ps>,
Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
let AssemblerPredicate=HasUnpackedD16VMem;
Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Tue Apr 30 15:08:23 2019
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
-def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
-def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>;
-def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>;
+def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
+def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -51,6 +51,8 @@ class FLAT_Pseudo<string opName, dag out
bits<1> has_data = 1;
bits<1> has_glc = 1;
bits<1> glcValue = 0;
+ bits<1> has_dlc = 1;
+ bits<1> dlcValue = 0;
let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
!if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -88,6 +90,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo
bits<1> slc;
bits<1> glc;
+ bits<1> dlc;
// Only valid on gfx9
bits<1> lds = 0; // XXX - What does this actually do?
@@ -141,9 +144,9 @@ class FLAT_Load_Pseudo <string opName, R
!con((ins VReg_64:$vaddr),
!if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
(ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
- (ins GLC:$glc, SLC:$slc)),
+ (ins GLC:$glc, SLC:$slc, DLC:$dlc)),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
- " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+ " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = HasSaddr;
@@ -164,8 +167,8 @@ class FLAT_Store_Pseudo <string opName,
!con((ins VReg_64:$vaddr, vdataClass:$vdata),
!if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
(ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
- (ins GLC:$glc, SLC:$slc)),
- " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+ (ins GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -198,9 +201,9 @@ class FLAT_Scratch_Load_Pseudo <string o
opName,
(outs regClass:$vdst),
!if(EnableSaddr,
- (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
- (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
- " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
+ (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = 1;
@@ -214,9 +217,9 @@ class FLAT_Scratch_Store_Pseudo <string
opName,
(outs),
!if(EnableSaddr,
- (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
- (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
- " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
+ (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -248,6 +251,8 @@ class FLAT_AtomicNoRet_Pseudo<string opN
let mayStore = 1;
let has_glc = 0;
let glcValue = 0;
+ let has_dlc = 0;
+ let dlcValue = 0;
let has_vdst = 0;
let maybeAtomic = 1;
}
@@ -258,6 +263,7 @@ class FLAT_AtomicRet_Pseudo<string opNam
let hasPostISelHook = 1;
let has_vdst = 1;
let glcValue = 1;
+ let dlcValue = 0;
let PseudoInstr = NAME # "_RTN";
}
@@ -492,8 +498,8 @@ defm FLAT_ATOMIC_INC_X2 : FLAT_Atomi
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
VReg_64, i64, atomic_dec_flat>;
-// GFX7-only flat instructions.
-let SubtargetPredicate = isGFX7Only in {
+// GFX7-, GFX10-only flat instructions.
+let SubtargetPredicate = isGFX7GFX10 in {
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
VGPR_32, f32, null_frag, v2f32, VReg_64>;
@@ -513,7 +519,7 @@ defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atom
defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
VReg_64, f64>;
-} // End SubtargetPredicate = isGFX7Only
+} // End SubtargetPredicate = isGFX7GFX10
let SubtargetPredicate = HasFlatGlobalInsts in {
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
@@ -656,6 +662,22 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_S
} // End SubtargetPredicate = HasFlatScratchInsts
+let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
+ defm GLOBAL_ATOMIC_FCMPSWAP :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+ defm GLOBAL_ATOMIC_FMIN :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
+ defm GLOBAL_ATOMIC_FMAX :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
+ defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_FMIN_X2 :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_FMAX_X2 :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
+} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
+
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -663,51 +685,51 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_S
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, $slc)
+ (inst $vaddr, $offset, 0, 0, $slc)
>;
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
- (inst $vaddr, $offset, 0, $slc, $in)
+ (inst $vaddr, $offset, 0, 0, $slc, $in)
>;
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
- (inst $vaddr, $offset, 0, $slc, $in)
+ (inst $vaddr, $offset, 0, 0, $slc, $in)
>;
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, $slc)
+ (inst $vaddr, $offset, 0, 0, $slc)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, $slc)
+ (inst $vaddr, $offset, 0, 0, $slc)
>;
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, $data, $offset, 0, 0, $slc)
>;
class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, $data, $offset, 0, 0, $slc)
>;
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
(node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, $data, $offset, 0, 0, $slc)
>;
class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
(node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, $data, $offset, 0, 0, $slc)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
@@ -1108,3 +1130,193 @@ defm SCRATCH_STORE_DWORD : FLAT_R
defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
+ FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
+ let AssemblerPredicate = isGFX10Plus;
+ let DecoderNamespace = "GFX10";
+
+ let Inst{11-0} = {offset{12}, offset{10-0}};
+ let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
+ let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
+ let Inst{55} = 0;
+}
+
+
+multiclass FLAT_Real_Base_gfx10<bits<7> op> {
+ def _gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
+}
+
+multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
+ def _RTN_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+}
+
+multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
+ def _SADDR_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+}
+
+multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
+ def _SADDR_RTN_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+}
+
+
+multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
+ FLAT_Real_Base_gfx10<op>,
+ FLAT_Real_SADDR_gfx10<op>;
+
+multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
+ FLAT_Real_Base_gfx10<op>,
+ FLAT_Real_RTN_gfx10<op>;
+
+multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
+ FLAT_Real_AllAddr_gfx10<op>,
+ FLAT_Real_RTN_gfx10<op>,
+ FLAT_Real_SADDR_RTN_gfx10<op>;
+
+
+// ENC_FLAT.
+defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
+defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>;
+defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>;
+defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>;
+defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>;
+defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>;
+defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>;
+defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>;
+defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>;
+defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>;
+defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>;
+defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>;
+defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>;
+defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>;
+defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>;
+defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>;
+defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>;
+defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>;
+defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>;
+defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>;
+defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>;
+defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>;
+defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>;
+defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>;
+defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>;
+defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>;
+defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>;
+defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>;
+defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>;
+defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>;
+defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>;
+defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>;
+defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>;
+defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>;
+defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>;
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>;
+defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>;
+defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>;
+
+
+// ENC_FLAT_GLBL.
+defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
+defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
+defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
+defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
+defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
+defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
+defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
+defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
+defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
+defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
+defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
+defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
+defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
+defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
+defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
+defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
+defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
+defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>;
+defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>;
+defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>;
+defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>;
+defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>;
+defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>;
+defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>;
+defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>;
+defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>;
+defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>;
+defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>;
+defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>;
+defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>;
+defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>;
+defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>;
+defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>;
+defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>;
+defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>;
+defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>;
+defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>;
+defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>;
+defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
+defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
+defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
+defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
+
+
+// ENC_FLAT_SCRATCH.
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
+defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
+defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
+defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Tue Apr 30 15:08:23 2019
@@ -72,8 +72,14 @@ void AMDGPUInstPrinter::printU16ImmDecOp
}
void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
+ // GFX10: Address offset is 12-bit signed byte offset.
+ if (AMDGPU::isGFX10(STI)) {
+ O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
+ } else {
+ O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
+ }
}
void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
@@ -128,7 +134,7 @@ void AMDGPUInstPrinter::printOffsetS13(c
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << ((OpNo == 0)? "offset:" : " offset:");
- printS13ImmDecOperand(MI, OpNo, O);
+ printS13ImmDecOperand(MI, OpNo, STI, O);
}
}
@@ -173,6 +179,12 @@ void AMDGPUInstPrinter::printGDS(const M
printNamedBit(MI, OpNo, O, "gds");
}
+void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ if (AMDGPU::isGFX10(STI))
+ printNamedBit(MI, OpNo, O, "dlc");
+}
+
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "glc");
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Tue Apr 30 15:08:23 2019
@@ -41,7 +41,8 @@ private:
void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
@@ -67,6 +68,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
Modified: llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp Tue Apr 30 15:08:23 2019
@@ -197,6 +197,11 @@ static bool fixupGlobalSaddr(MachineBasi
// Atomics dont have a GLC, so omit the field if not there.
if (Glc)
NewGlob->addOperand(MF, *Glc);
+
+ MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
+ if (DLC)
+ NewGlob->addOperand(MF, *DLC);
+
NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
// _D16 have an vdst_in operand, copy it in.
MachineOperand *VDstInOp = TII->getNamedOperand(MI,
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue Apr 30 15:08:23 2019
@@ -70,6 +70,24 @@ void SIFrameLowering::emitFlatScratchIni
// Do a 64-bit pointer add.
if (ST.flatScratchIsPointer()) {
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+ addReg(FlatScrInitLo).
+ addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
+ (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+ addReg(FlatScrInitHi).
+ addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
+ (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ return;
+ }
+
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
@@ -80,6 +98,8 @@ void SIFrameLowering::emitFlatScratchIni
return;
}
+ assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
+
// Copy the size in bytes.
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitHi, RegState::Kill);
@@ -423,6 +443,7 @@ void SIFrameLowering::emitEntryFunctionS
.addReg(Rsrc01)
.addImm(EncodedOffset) // offset
.addImm(0) // glc
+ .addImm(0) // dlc
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
return;
@@ -463,6 +484,7 @@ void SIFrameLowering::emitEntryFunctionS
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
.addImm(0) // glc
+ .addImm(0) // dlc
.addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Apr 30 15:08:23 2019
@@ -4166,6 +4166,10 @@ void SIInstrInfo::legalizeOperands(Machi
getNamedOperand(MI, AMDGPU::OpName::glc)) {
MIB.addImm(GLC->getImm());
}
+ if (const MachineOperand *DLC =
+ getNamedOperand(MI, AMDGPU::OpName::dlc)) {
+ MIB.addImm(DLC->getImm());
+ }
MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Tue Apr 30 15:08:23 2019
@@ -830,6 +830,7 @@ def omod : NamedOperandU32<"OModSI", Nam
def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
+def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
Modified: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp Tue Apr 30 15:08:23 2019
@@ -131,6 +131,8 @@ class SILoadStoreOptimizer : public Mach
bool GLC1;
bool SLC0;
bool SLC1;
+ bool DLC0;
+ bool DLC1;
bool UseST64;
SmallVector<MachineInstr *, 8> InstsToMove;
};
@@ -323,7 +325,7 @@ bool SILoadStoreOptimizer::offsetsCanBeC
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width0 == EltOffset1 ||
EltOffset1 + CI.Width1 == EltOffset0) &&
- CI.GLC0 == CI.GLC1 &&
+ CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
}
@@ -637,6 +639,8 @@ bool SILoadStoreOptimizer::findMatchingI
CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
}
+ CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
+ CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
}
// Check both offsets fit in the reduced range.
@@ -857,6 +861,7 @@ SILoadStoreOptimizer::mergeSBufferLoadIm
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC0) // glc
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -909,6 +914,7 @@ SILoadStoreOptimizer::mergeBufferLoadPai
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -1088,9 +1094,10 @@ SILoadStoreOptimizer::mergeBufferStorePa
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(MIB, CI.InstsToMove);
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Tue Apr 30 15:08:23 2019
@@ -536,6 +536,7 @@ static bool buildMUBUFOffsetLoadStore(co
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
+ .addImm(0) // dlc
.cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@@ -639,6 +640,7 @@ void SIRegisterInfo::buildSpillLoadStore
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
+ .addImm(0) // dlc
.addMemOperand(NewMMO);
if (NumSubRegs > 1)
@@ -769,6 +771,7 @@ bool SIRegisterInfo::spillSGPR(MachineBa
.addReg(MFI->getScratchRSrcReg()) // sbase
.addReg(OffsetReg, RegState::Kill) // soff
.addImm(0) // glc
+ .addImm(0) // dlc
.addMemOperand(MMO);
continue;
@@ -928,9 +931,10 @@ bool SIRegisterInfo::restoreSGPR(Machine
auto MIB =
BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
- .addReg(MFI->getScratchRSrcReg()) // sbase
- .addReg(OffsetReg, RegState::Kill) // soff
- .addImm(0) // glc
+ .addReg(MFI->getScratchRSrcReg()) // sbase
+ .addReg(OffsetReg, RegState::Kill) // soff
+ .addImm(0) // glc
+ .addImm(0) // dlc
.addMemOperand(MMO);
if (NumSubRegs > 1 && i == 0)
Modified: llvm/trunk/lib/Target/AMDGPU/SMInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SMInstructions.td?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SMInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SMInstructions.td Tue Apr 30 15:08:23 2019
@@ -40,6 +40,7 @@ class SM_Pseudo <string opName, dag outs
bits<1> has_sbase = 1;
bits<1> has_sdst = 1;
bit has_glc = 0;
+ bit has_dlc = 0;
bits<1> has_offset = 1;
bits<1> offset_is_imm = 0;
}
@@ -79,6 +80,7 @@ class SM_Load_Pseudo <string opName, dag
let mayLoad = 1;
let mayStore = 0;
let has_glc = 1;
+ let has_dlc = 1;
}
class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
@@ -88,6 +90,7 @@ class SM_Store_Pseudo <string opName, da
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
+ let has_dlc = 1;
let ScalarStore = 1;
}
@@ -108,21 +111,23 @@ multiclass SM_Pseudo_Loads<string opName
RegisterClass dstClass> {
def _IMM : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
- " $sdst, $sbase, $offset$glc", []> {
+ (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+ " $sdst, $sbase, $offset$glc$dlc", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_IMM";
let has_glc = 1;
+ let has_dlc = 1;
}
def _SGPR : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
- " $sdst, $sbase, $offset$glc", []> {
+ (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+ " $sdst, $sbase, $offset$glc$dlc", []> {
let BaseClass = baseClass;
let PseudoInstr = opName # "_SGPR";
let has_glc = 1;
+ let has_dlc = 1;
}
}
@@ -130,8 +135,8 @@ multiclass SM_Pseudo_Stores<string opNam
RegisterClass baseClass,
RegisterClass srcClass> {
def _IMM : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
- " $sdata, $sbase, $offset$glc", []> {
+ (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+ " $sdata, $sbase, $offset$glc$dlc", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
@@ -139,8 +144,8 @@ multiclass SM_Pseudo_Stores<string opNam
}
def _SGPR : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
- " $sdata, $sbase, $offset$glc", []> {
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+ " $sdata, $sbase, $offset$glc$dlc", []> {
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
@@ -184,6 +189,16 @@ multiclass SM_Pseudo_Probe<string opName
def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
}
+class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+ opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
+ " $sdst", [(set i32:$sdst, (node))]> {
+ let hasSideEffects = 1;
+ let mayStore = 0;
+ let mayLoad = 1;
+ let has_sbase = 0;
+ let has_offset = 0;
+}
+
//===----------------------------------------------------------------------===//
// Scalar Atomic Memory Classes
//===----------------------------------------------------------------------===//
@@ -197,6 +212,7 @@ class SM_Atomic_Pseudo <string opName,
let mayLoad = 1;
let mayStore = 1;
let has_glc = 1;
+ let has_dlc = 1;
// Should these be set?
let ScalarStore = 1;
@@ -212,9 +228,9 @@ class SM_Pseudo_Atomic<string opName,
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!if(isImm,
- (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset),
- (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset)),
- !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", ""),
+ (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
+ (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
+ !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
isRet> {
let offset_is_imm = isImm;
let PseudoInstr = opName # !if(isImm,
@@ -272,6 +288,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_
"s_buffer_load_dwordx16", SReg_128, SReg_512
>;
+let SubtargetPredicate = HasScalarStores in {
defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
@@ -287,7 +304,7 @@ defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_
defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
"s_buffer_store_dwordx4", SReg_128, SReg_128
>;
-
+} // End SubtargetPredicate = HasScalarStores
def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
@@ -297,15 +314,22 @@ def S_DCACHE_INV_VOL : SM_Inval_Pseudo <
} // let SubtargetPredicate = isGFX7GFX8GFX9
let SubtargetPredicate = isGFX8Plus in {
+let OtherPredicates = [HasScalarStores] in {
def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+} // End OtherPredicates = [HasScalarStores]
def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
} // SubtargetPredicate = isGFX8Plus
-let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in {
+let SubtargetPredicate = isGFX10Plus in {
+def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
+def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
+} // End SubtargetPredicate = isGFX10Plus
+
+let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
@@ -313,7 +337,7 @@ defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo
defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
-} // SubtargetPredicate = HasFlatScratchInsts
+} // SubtargetPredicate = HasScalarFlatScratchInsts
let SubtargetPredicate = HasScalarAtomics in {
@@ -375,7 +399,7 @@ defm S_ATOMIC_DEC_X2 : SM_P
} // let SubtargetPredicate = HasScalarAtomics
-let SubtargetPredicate = isGFX9Only in {
+let SubtargetPredicate = HasScalarAtomics in {
defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
}
@@ -411,13 +435,13 @@ multiclass SM_Real_Loads_si<bits<5> op,
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_si : SMRD_Real_si <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
}
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
}
}
@@ -464,10 +488,10 @@ multiclass SM_Real_Loads_vi<bits<8> op,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_vi : SMEM_Real_vi <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
}
}
@@ -485,11 +509,11 @@ multiclass SM_Real_Stores_vi<bits<8> op,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
}
}
@@ -638,7 +662,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op,
let AssemblerPredicates = [isGFX7Only];
let DecoderNamespace = "GFX7";
- let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
+ let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
let LGKM_CNT = ps.LGKM_CNT;
let SMRD = ps.SMRD;
@@ -718,26 +742,26 @@ multiclass SMRD_Pattern <string Instr, V
// 1. IMM offset
def : GCNPat <
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
- (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
+ (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
let OtherPredicates = [isGFX7Only];
}
// 3. SGPR offset
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
>;
// 4. No offset
def : GCNPat <
(vt (smrd_load (i64 SReg_64:$sbase))),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
>;
}
@@ -745,20 +769,20 @@ multiclass SMLoad_Pattern <string Instr,
// 1. Offset as an immediate
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), 0))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
- (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
+ (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), 0)> {
let OtherPredicates = [isGFX7Only];
}
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), 0))
>;
}
@@ -801,3 +825,198 @@ def : GCNPat <
>;
} // let OtherPredicates = [isGFX8Plus]
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
+ SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
+ bit glc;
+ bit dlc;
+
+ let AssemblerPredicates = [isGFX10Plus];
+ let DecoderNamespace = "GFX10";
+
+ let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
+ let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
+ let Inst{14} = !if(ps.has_dlc, dlc, ?);
+ let Inst{16} = !if(ps.has_glc, glc, ?);
+ let Inst{25-18} = op;
+ let Inst{31-26} = 0x3d;
+ let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
+ let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
+ !if(ps.has_offset, offset{6-0}, ?));
+}
+
+multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
+ SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
+ SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+ def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ }
+ def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ }
+}
+
+class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
+ bits<7> sdata;
+
+ let sdst = ?;
+ let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
+}
+
+multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
+ SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
+ SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
+ // FIXME: The operand name $offset is inconsistent with $soff used
+ // in the pseudo
+ def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ }
+
+ def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ }
+}
+
+defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
+defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
+defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
+defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
+defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarFlatScratchInsts in {
+defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
+defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
+defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
+} // End SubtargetPredicate = HasScalarFlatScratchInsts
+
+defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarStores in {
+defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
+defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
+defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
+let OtherPredicates = [HasScalarFlatScratchInsts] in {
+defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
+} // End OtherPredicates = [HasScalarFlatScratchInsts]
+defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
+defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
+defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
+} // End SubtargetPredicate = HasScalarStores
+
+def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
+def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>;
+def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
+def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
+def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
+
+let SubtargetPredicate = HasScalarStores in {
+def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
+} // End SubtargetPredicate = HasScalarStores
+
+multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
+
+class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
+ : SMEM_Real_gfx10 <op, ps> {
+
+ bits<7> sdata;
+ bit dlc;
+
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
+ let glc = ps.glc;
+
+ let Inst{14} = !if(ps.has_dlc, dlc, 0);
+ let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
+}
+
+multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+ def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
+ def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+}
+
+let SubtargetPredicate = HasScalarAtomics in {
+
+defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
+defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
+defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
+defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
+defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
+defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
+defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
+defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
+defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
+defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
+defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
+defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
+
+defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
+defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
+defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
+defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
+defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
+defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
+defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
+defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
+defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
+defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
+defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
+defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
+defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
+
+defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
+defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
+defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
+defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
+defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
+defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
+defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
+defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
+defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
+defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
+defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
+defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
+defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
+
+multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
+defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
+
+} // End SubtargetPredicate = HasScalarAtomics
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir Tue Apr 30 15:08:23 2019
@@ -18,7 +18,7 @@ body: |
; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir Tue Apr 30 15:08:23 2019
@@ -13,7 +13,7 @@ body: |
; GCN-LABEL: name: implicit_def_s32
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr3_vgpr4
%1:vgpr(s32) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -30,7 +30,7 @@ body: |
; GCN-LABEL: name: implicit_def_s64
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr3_vgpr4
%1:vgpr(s64) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store 8, addrspace 1)
@@ -60,7 +60,7 @@ body: |
; GCN-LABEL: name: implicit_def_p1
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -76,7 +76,7 @@ body: |
; GCN-LABEL: name: implicit_def_p3
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p3) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -92,7 +92,7 @@ body: |
; GCN-LABEL: name: implicit_def_p4
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p4) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
Modified: llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir Tue Apr 30 15:08:23 2019
@@ -8,9 +8,9 @@ name: trivial_smem_clause_load_smrd4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
@@ -20,11 +20,11 @@ name: trivial_smem_clause_load_smrd4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -34,13 +34,13 @@ name: trivial_smem_clause_load_smrd4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
- ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
- ; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ ; GCN-NEXT: S_ENDPGM 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
S_ENDPGM 0
...
---
@@ -50,15 +50,15 @@ name: trivial_smem_clause_load_smrd4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
- ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
- ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
- ; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
- $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
+ ; GCN-NEXT: S_ENDPGM 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
S_ENDPGM 0
...
---
@@ -67,11 +67,11 @@ name: trivial_smem_clause_load_smrd4_x2_
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
- ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
@@ -81,9 +81,9 @@ name: smrd_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
- ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
@@ -93,9 +93,9 @@ name: smrd_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
- ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
@@ -105,9 +105,9 @@ name: smrd_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: smrd_load8_overwrite_ptr
- ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
@@ -119,46 +119,46 @@ name: break_smem_clause_at_max_smem_clau
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
- ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
+ ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
; GCN-NEXT: S_ENDPGM 0
- $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-
- $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-
- $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
-
- $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+
+ $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+
+ $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+
+ $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
$sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
S_ENDPGM 0
...
@@ -169,12 +169,12 @@ name: break_smem_clause_simple_load_smrd
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
- ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -184,11 +184,11 @@ name: break_smem_clause_simple_load_smrd
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -198,12 +198,12 @@ name: break_smem_clause_simple_load_smrd
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -213,11 +213,11 @@ name: break_smem_clause_simple_load_smrd
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
- $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
S_ENDPGM 0
...
---
@@ -228,16 +228,16 @@ body: |
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
bb.0:
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
bb.1:
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -248,11 +248,11 @@ name: break_smem_clause_store_load_into_
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
- ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+ ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
- $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+ S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
+ $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
S_ENDPGM 0
...
---
@@ -264,11 +264,11 @@ name: break_smem_clause_store_load_into_
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
- ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -278,13 +278,13 @@ name: valu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -294,13 +294,13 @@ name: salu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr8 = S_MOV_B32 0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -309,13 +309,13 @@ name: ds_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -325,13 +325,13 @@ name: flat_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: flat_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
- ; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: S_ENDPGM 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
@@ -341,11 +341,11 @@ name: implicit_use_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_smem_clause
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
+ ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
- $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
+ $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir Tue Apr 30 15:08:23 2019
@@ -7,10 +7,10 @@ name: trivial_clause_load_flat4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x1
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -20,12 +20,12 @@ name: trivial_clause_load_flat4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -35,14 +35,14 @@ name: trivial_clause_load_flat4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x3
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -52,16 +52,16 @@ name: trivial_clause_load_flat4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x4
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -71,12 +71,12 @@ name: trivial_clause_load_flat4_x2_samep
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -86,10 +86,10 @@ name: flat_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -99,10 +99,10 @@ name: flat_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
- ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -112,10 +112,10 @@ name: flat_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: flat_load8_overwrite_ptr
- ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -128,48 +128,48 @@ name: break_clause_at_max_clause_size_fl
body: |
bb.0:
; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
- ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
; GCN-NEXT: S_ENDPGM 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-
- $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-
- $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
-
- $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
S_ENDPGM 0
...
@@ -180,13 +180,13 @@ name: break_clause_simple_load_flat4_lo_
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -196,13 +196,13 @@ name: break_clause_simple_load_flat4_hi_
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -212,13 +212,13 @@ name: break_clause_simple_load_flat8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -229,12 +229,12 @@ name: break_clause_simple_load_flat16_pt
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -249,17 +249,17 @@ body: |
; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
bb.0:
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
- $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -270,12 +270,12 @@ name: break_clause_store_load_into_ptr_f
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -287,12 +287,12 @@ name: break_clause_store_load_into_data_
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_data_flat4
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -303,15 +303,15 @@ name: valu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -322,15 +322,15 @@ name: salu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$sgpr8 = S_MOV_B32 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -340,15 +340,15 @@ name: ds_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -358,14 +358,14 @@ name: smrd_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: smrd_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -375,13 +375,13 @@ name: implicit_use_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_clause
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
- $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
+ $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -390,12 +390,12 @@ name: trivial_clause_load_mubuf4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -404,13 +404,13 @@ name: break_clause_simple_load_mubuf_off
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
- $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -421,11 +421,11 @@ name: mubuf_load4_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
- ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
S_ENDPGM 0
@@ -438,13 +438,13 @@ name: break_clause_flat_load_mubuf_load
body: |
bb.0:
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# Break a clause from interference between mubuf and flat instructions
@@ -459,8 +459,8 @@ name: break_clause_mubuf_load_flat_load
body: |
bb.0:
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
- $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -471,12 +471,12 @@ name: break_clause_atomic_rtn_into_ptr_f
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
- ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -487,11 +487,11 @@ body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -501,12 +501,12 @@ name: break_clause_atomic_rtn_into_ptr_m
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -518,11 +518,11 @@ body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -533,11 +533,11 @@ name: no_break_clause_mubuf_load_novaddr
body: |
bb.0:
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -547,16 +547,16 @@ name: mix_load_store_clause
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause
- ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -566,15 +566,15 @@ name: mix_load_store_clause_same_address
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause_same_address
- ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir Tue Apr 30 15:08:23 2019
@@ -43,8 +43,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -55,10 +55,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -105,8 +105,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -117,10 +117,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -168,8 +168,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -180,10 +180,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -233,8 +233,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -245,10 +245,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -310,8 +310,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -322,10 +322,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -375,8 +375,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -387,10 +387,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir Tue Apr 30 15:08:23 2019
@@ -17,15 +17,15 @@ body: |
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr4_vgpr5 = IMPLICIT_DEF
- $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr2 = IMPLICIT_DEF
$vgpr3 = IMPLICIT_DEF
$vgpr6 = IMPLICIT_DEF
$vgpr0 = V_ADD_I32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
- FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
- FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir Tue Apr 30 15:08:23 2019
@@ -14,7 +14,7 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
- %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir Tue Apr 30 15:08:23 2019
@@ -30,7 +30,7 @@ body: |
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
- BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
+ BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
S_ENDPGM 0
bb.2:
@@ -78,7 +78,7 @@ body: |
bb.8:
successors: %bb.10
- %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
+ %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
%35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
%28:vreg_1 = COPY %35
Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir Tue Apr 30 15:08:23 2019
@@ -83,7 +83,7 @@ body: |
bb.9:
successors: %bb.10(0x80000000)
- %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
%22:sreg_64 = COPY $exec, implicit-def $exec
%23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc
Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir Tue Apr 30 15:08:23 2019
@@ -68,7 +68,7 @@ body: |
%23:vreg_128 = COPY killed %17
%24:sreg_64 = COPY killed %16
%25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
- %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
%29:vreg_128 = COPY killed %21
%29.sub0:vreg_128 = COPY %1
Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir Tue Apr 30 15:08:23 2019
@@ -46,10 +46,10 @@ body: |
%0 = COPY $sgpr2_sgpr3
%1 = COPY $vgpr2
%2 = COPY $vgpr3
- %3 = S_LOAD_DWORDX8_IMM %0, 0, 0
- %4 = S_LOAD_DWORDX4_IMM %0, 12, 0
- %5 = S_LOAD_DWORDX8_IMM %0, 16, 0
- %6 = S_LOAD_DWORDX4_IMM %0, 28, 0
+ %3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0
+ %4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0
+ %5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0
+ %6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0
undef %7.sub0 = S_MOV_B32 212739
%20 = COPY %7
%11 = COPY %20
Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir Tue Apr 30 15:08:23 2019
@@ -11,7 +11,7 @@
#
# GCN-LABEL: bb.6:
# GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
-# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec
+# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, implicit $exec
#
--- |
@@ -69,7 +69,7 @@ body: |
%10:sreg_64 = COPY killed %5
undef %11.sub2:sreg_128 = COPY %4
%11.sub3:sreg_128 = COPY %3
- %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec
+ %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, implicit $exec
undef %13.sub1:vreg_128 = COPY %9.sub1
%13.sub2:vreg_128 = COPY %9.sub2
%14:sreg_64 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $exec
@@ -161,7 +161,7 @@ body: |
bb.18:
successors: %bb.7(0x80000000)
dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec
- dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec
+ dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, implicit $exec
undef %66.sub1:vreg_128 = COPY %13.sub1
%66.sub2:vreg_128 = COPY %13.sub2
%67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir Tue Apr 30 15:08:23 2019
@@ -145,10 +145,10 @@ body: |
%40:vgpr_32 = V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $exec
%41:vgpr_32 = V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $exec
%42:vgpr_32 = V_CVT_I32_F32_e32 killed %41, implicit $exec
- %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0 :: (dereferenceable invariant load 4)
+ %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0, 0 :: (dereferenceable invariant load 4)
%45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec
%46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
- %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
+ %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
%50:sreg_64 = COPY $exec, implicit-def $exec
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir Tue Apr 30 15:08:23 2019
@@ -25,7 +25,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -33,7 +33,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -44,7 +44,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: DBG_VALUE
@@ -72,7 +72,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -80,7 +80,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -92,7 +92,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -133,7 +133,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -141,7 +141,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -152,7 +152,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: bb.4:
@@ -180,7 +180,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -188,7 +188,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -200,7 +200,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -241,7 +241,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -249,7 +249,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -260,7 +260,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: bb.4:
@@ -289,7 +289,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -297,7 +297,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -309,7 +309,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -350,7 +350,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -358,7 +358,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -370,7 +370,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
@@ -400,7 +400,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -408,7 +408,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -420,7 +420,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
%15:sgpr_32 = IMPLICIT_DEF
@@ -463,7 +463,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -471,7 +471,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
@@ -482,7 +482,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
@@ -512,7 +512,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -520,7 +520,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -532,7 +532,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -575,7 +575,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -583,7 +583,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -595,7 +595,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -623,7 +623,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -631,7 +631,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -643,7 +643,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -683,7 +683,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -691,7 +691,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -703,7 +703,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -731,7 +731,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -739,7 +739,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -751,7 +751,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
@@ -791,7 +791,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -799,7 +799,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -811,7 +811,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.5(0x80000000)
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -842,7 +842,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -850,7 +850,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -862,7 +862,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
Modified: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir Tue Apr 30 15:08:23 2019
@@ -44,7 +44,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
- %1 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %1 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%2 = COPY %1.sub1
%3 = COPY %1.sub0
%4 = S_MOV_B32 61440
@@ -54,7 +54,7 @@ body: |
%8 = S_MOV_B32 9999
%9 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
%10 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -130,7 +130,7 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%31 = V_ASHRREV_I32_e64 31, %3, implicit $exec
%32 = REG_SEQUENCE %3, 1, %31, 2
%33 = V_LSHLREV_B64 2, killed %32, implicit $exec
@@ -144,19 +144,19 @@ body: |
%34 = V_MOV_B32_e32 63, implicit $exec
%27 = V_AND_B32_e64 %26, %24, implicit $exec
- FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %37, %27, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_AND_B32_e64 %24, %26, implicit $exec
- FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %37, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%29 = V_AND_B32_e32 %26, %24, implicit $exec
- FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %37, %29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%30 = V_AND_B32_e64 %26, %26, implicit $exec
- FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %37, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%31 = V_AND_B32_e64 %34, %34, implicit $exec
- FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %37, %31, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@@ -210,7 +210,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%5 = S_MOV_B32 1
%6 = COPY %4.sub1
%7 = COPY %4.sub0
@@ -219,7 +219,7 @@ body: |
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
%12 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
%13 = COPY %12
- BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -316,7 +316,7 @@ body: |
%2 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16 = REG_SEQUENCE %2, 1, %15, 2
%17 = V_LSHLREV_B64 2, killed %16, implicit $exec
@@ -332,34 +332,34 @@ body: |
%27 = S_MOV_B32 -4
%11 = V_LSHLREV_B32_e64 12, %10, implicit $exec
- FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%12 = V_LSHLREV_B32_e64 %7, 12, implicit $exec
- FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%13 = V_LSHL_B32_e64 %7, 12, implicit $exec
- FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = V_LSHL_B32_e64 12, %7, implicit $exec
- FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%15 = V_LSHL_B32_e64 12, %24, implicit $exec
- FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%22 = V_LSHL_B32_e64 %6, 12, implicit $exec
- FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%23 = V_LSHL_B32_e64 %6, 32, implicit $exec
- FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%25 = V_LSHL_B32_e32 %6, %6, implicit $exec
- FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%26 = V_LSHLREV_B32_e32 11, %24, implicit $exec
- FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_LSHL_B32_e32 %27, %6, implicit $exec
- FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@@ -410,7 +410,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%5 = S_MOV_B32 999123
%6 = COPY %4.sub1
%7 = COPY %4.sub0
@@ -419,7 +419,7 @@ body: |
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
%12 = S_ASHR_I32 killed %5, 12, implicit-def dead $scc
%13 = COPY %12
- BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -519,7 +519,7 @@ body: |
%2 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %3 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16 = REG_SEQUENCE %2, 1, %15, 2
%17 = V_LSHLREV_B64 2, killed %16, implicit $exec
@@ -540,34 +540,34 @@ body: |
%35 = V_MOV_B32_e32 2, implicit $exec
%11 = V_ASHRREV_I32_e64 8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%12 = V_ASHRREV_I32_e64 %8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%13 = V_ASHR_I32_e64 %7, 3, implicit $exec
- FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = V_ASHR_I32_e64 7, %32, implicit $exec
- FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%15 = V_ASHR_I32_e64 %27, %24, implicit $exec
- FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%22 = V_ASHR_I32_e64 %6, 4, implicit $exec
- FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%23 = V_ASHR_I32_e64 %6, %33, implicit $exec
- FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%25 = V_ASHR_I32_e32 %34, %34, implicit $exec
- FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%26 = V_ASHRREV_I32_e32 11, %10, implicit $exec
- FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_ASHR_I32_e32 %27, %35, implicit $exec
- FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@@ -618,7 +618,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%5 = S_MOV_B32 -999123
%6 = COPY %4.sub1
%7 = COPY %4.sub0
@@ -627,7 +627,7 @@ body: |
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
%12 = S_LSHR_B32 killed %5, 12, implicit-def dead $scc
%13 = COPY %12
- BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -728,7 +728,7 @@ body: |
%2 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %3 = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16 = REG_SEQUENCE %2, 1, %15, 2
%17 = V_LSHLREV_B64 2, killed %16, implicit $exec
@@ -749,34 +749,34 @@ body: |
%35 = V_MOV_B32_e32 2, implicit $exec
%11 = V_LSHRREV_B32_e64 8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%12 = V_LSHRREV_B32_e64 %8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%13 = V_LSHR_B32_e64 %7, 3, implicit $exec
- FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = V_LSHR_B32_e64 7, %32, implicit $exec
- FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%15 = V_LSHR_B32_e64 %27, %24, implicit $exec
- FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%22 = V_LSHR_B32_e64 %6, 4, implicit $exec
- FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%23 = V_LSHR_B32_e64 %6, %33, implicit $exec
- FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%25 = V_LSHR_B32_e32 %34, %34, implicit $exec
- FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%26 = V_LSHRREV_B32_e32 11, %10, implicit $exec
- FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_LSHR_B32_e32 %27, %35, implicit $exec
- FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@@ -800,7 +800,7 @@ body: |
bb.0:
%0 = V_MOV_B32_e32 0, implicit $exec
%2 = V_XOR_B32_e64 killed %0, undef %1, implicit $exec
- FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir Tue Apr 30 15:08:23 2019
@@ -291,7 +291,7 @@ body: |
bb.3..lr.ph3410.preheader:
successors: %bb.4(0x80000000)
- dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir Tue Apr 30 15:08:23 2019
@@ -11,7 +11,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
- %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0
+ %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0
S_NOP 0, implicit-def %4:sreg_128, implicit %10.sub1:sreg_128
S_CBRANCH_SCC0 %bb.3, implicit undef $scc
S_BRANCH %bb.1
@@ -26,7 +26,7 @@ body: |
S_BRANCH %bb.4
bb.3:
- %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0
+ %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0
%7:sreg_32_xm0 = COPY %10.sub1:sreg_128
%8:sreg_32_xm0 = COPY %10.sub2:sreg_128
Modified: llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir Tue Apr 30 15:08:23 2019
@@ -12,7 +12,7 @@ body: |
%1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
%3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
- FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir Tue Apr 30 15:08:23 2019
@@ -50,6 +50,6 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, 0, implicit $exec
dead %2:vgpr_32 = V_MAC_F32_e32 %0:vgpr_32, %1:vgpr_32, %2:vgpr_32, implicit $exec
S_ENDPGM 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir Tue Apr 30 15:08:23 2019
@@ -23,5 +23,5 @@ body: |
$vgpr10 = COPY killed $sgpr14, implicit $exec
$vgpr11 = COPY killed $sgpr15, implicit $exec
- FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir Tue Apr 30 15:08:23 2019
@@ -71,7 +71,7 @@ body: |
; CHECK: dead %26:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $exec
; CHECK: dead %27:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $exec
; CHECK: dead %28:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $exec
- ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -129,7 +129,7 @@ body: |
%26:vgpr_32 = V_MAD_F32 0, %25, 0, %4, 0, %1, 0, 0, implicit $exec
%27:vgpr_32 = V_MAD_F32 0, %25, 0, %5, 0, %2, 0, 0, implicit $exec
%28:vgpr_32 = V_MAD_F32 0, %25, 0, %6, 0, %3, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir Tue Apr 30 15:08:23 2019
@@ -17,7 +17,7 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
@@ -25,7 +25,7 @@ body: |
---
# GCN-LABEL: name: load_without_memoperand
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: S_ENDPGM 0
name: load_without_memoperand
tracksRegLiveness: true
@@ -41,7 +41,7 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
@@ -49,7 +49,7 @@ body: |
---
# GCN-LABEL: name: load_volatile
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
+# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
# GCN-NEXT: S_ENDPGM 0
name: load_volatile
tracksRegLiveness: true
@@ -65,7 +65,7 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
@@ -73,7 +73,7 @@ body: |
---
# GCN-LABEL: name: store
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
# GCN-NEXT: S_ENDPGM 0
name: store
tracksRegLiveness: true
@@ -86,7 +86,7 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
---
@@ -297,12 +297,12 @@ body: |
S_ENDPGM 0
...
-# GCN-LABEL: name: implicit_use_on_s_endpgm
+# GCN-LABEL: name: implicit_use_on_S_ENDPGM 0
# GCN: V_ADD_I32
# GCN: COPY
# GCN: V_ADDC_U32
# GCN: S_ENDPGM 0, implicit %3
-name: implicit_use_on_s_endpgm
+name: implicit_use_on_S_ENDPGM 0
tracksRegLiveness: true
body: |
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir Tue Apr 30 15:08:23 2019
@@ -54,24 +54,24 @@ body: |
%1 = COPY $sgpr4_sgpr5
%0 = COPY $vgpr0
- %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %3 = S_LOAD_DWORDX2_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%7 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%2 = V_MOV_B32_e32 0, implicit $exec
undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
%11 = COPY %4.sub1
%12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
- %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
+ %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec
%8 = COPY %3.sub1
%9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec
%13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
- %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
+ %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec
%10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
- FLAT_STORE_DWORD %9, %5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
- FLAT_STORE_DWORD %10, %6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
+ FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
+ FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
S_ENDPGM 0
...
Added: llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll?rev=359621&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll Tue Apr 30 15:08:23 2019
@@ -0,0 +1,85 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+
+; GCN-LABEL: flat_inst_offset:
+; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:4
+; GFX9: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}} offset:4
+; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+; GFX10: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}{{$}}
+define void @flat_inst_offset(i32* nocapture %p) {
+ %gep = getelementptr inbounds i32, i32* %p, i64 1
+ %load = load i32, i32* %gep, align 4
+ %inc = add nsw i32 %load, 1
+ store i32 %inc, i32* %gep, align 4
+ ret void
+}
+
+; GCN-LABEL: global_inst_offset:
+; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off offset:4
+; GCN: global_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:4
+define void @global_inst_offset(i32 addrspace(1)* nocapture %p) {
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 1
+ %load = load i32, i32 addrspace(1)* %gep, align 4
+ %inc = add nsw i32 %load, 1
+ store i32 %inc, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
+; GCN-LABEL: load_i16_lo:
+; GFX9 : flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_i16_lo(i16* %arg, <2 x i16>* %out) {
+ %gep = getelementptr inbounds i16, i16* %arg, i32 4
+ %ld = load i16, i16* %gep, align 2
+ %vec = insertelement <2 x i16> <i16 undef, i16 0>, i16 %ld, i32 0
+ %v = add <2 x i16> %vec, %vec
+ store <2 x i16> %v, <2 x i16>* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: load_i16_hi:
+; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_i16_hi(i16* %arg, <2 x i16>* %out) {
+ %gep = getelementptr inbounds i16, i16* %arg, i32 4
+ %ld = load i16, i16* %gep, align 2
+ %vec = insertelement <2 x i16> <i16 0, i16 undef>, i16 %ld, i32 1
+ %v = add <2 x i16> %vec, %vec
+ store <2 x i16> %v, <2 x i16>* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: load_half_lo:
+; GFX9: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_half_lo(half* %arg, <2 x half>* %out) {
+ %gep = getelementptr inbounds half, half* %arg, i32 4
+ %ld = load half, half* %gep, align 2
+ %vec = insertelement <2 x half> <half undef, half 0xH0000>, half %ld, i32 0
+ %v = fadd <2 x half> %vec, %vec
+ store <2 x half> %v, <2 x half>* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: load_half_hi:
+; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
+; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_half_hi(half* %arg, <2 x half>* %out) {
+ %gep = getelementptr inbounds half, half* %arg, i32 4
+ %ld = load half, half* %gep, align 2
+ %vec = insertelement <2 x half> <half 0xH0000, half undef>, half %ld, i32 1
+ %v = fadd <2 x half> %vec, %vec
+ store <2 x half> %v, <2 x half>* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: load_float_lo:
+; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:16{{$}}
+; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
+define amdgpu_kernel void @load_float_lo(float* %arg, float* %out) {
+ %gep = getelementptr inbounds float, float* %arg, i32 4
+ %ld = load float, float* %gep, align 4
+ %v = fadd float %ld, %ld
+ store float %v, float* %out, align 4
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir Tue Apr 30 15:08:23 2019
@@ -10,12 +10,12 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr0_sgpr1
- %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0
+ %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0
%3:sreg_32_xm0 = S_MOV_B32 2
%4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:vreg_64 = REG_SEQUENCE killed %4, %subreg.sub0, killed %5, %subreg.sub1
- %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, implicit $exec
%8:sreg_32_xm0 = S_MOV_B32 65535
%9:vgpr_32 = COPY %8
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir Tue Apr 30 15:08:23 2019
@@ -158,10 +158,10 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = V_MOV_B32_e32 1065353216, implicit $exec
%13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -222,13 +222,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1065353216, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -289,14 +289,14 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
%16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -360,16 +360,16 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -427,13 +427,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -494,16 +494,16 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 -2, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -564,13 +564,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 15360, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -631,13 +631,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 80886784, implicit $exec
%14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -697,13 +697,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 305413120, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir Tue Apr 30 15:08:23 2019
@@ -46,9 +46,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -60,13 +60,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -117,9 +117,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -131,13 +131,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -188,9 +188,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -202,13 +202,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -259,9 +259,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -273,13 +273,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir Tue Apr 30 15:08:23 2019
@@ -34,7 +34,7 @@ body: |
%3 = S_LSHL_B32 %1, killed %1, implicit-def dead $scc
%4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec
%5 = IMPLICIT_DEF
- BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir Tue Apr 30 15:08:23 2019
@@ -93,7 +93,7 @@ body: |
%1:sgpr_64 = COPY $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 )
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 )
%5:sreg_32_xm0 = S_MOV_B32 2
%6:vgpr_32 = V_LSHLREV_B32_e64 killed %5, %0, implicit $exec
%7:sreg_32_xm0 = S_MOV_B32 0
@@ -109,140 +109,140 @@ body: |
%16:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %18, %subreg.sub1
%11:vreg_64 = COPY %16
- %10:vgpr_32 = GLOBAL_LOAD_DWORD %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_DWORD %11, %10, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %40:vreg_64 = GLOBAL_LOAD_DWORDX2 %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_DWORDX2 %11, %40, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %41:vreg_96 = GLOBAL_LOAD_DWORDX3 %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_DWORDX3 %11, %41, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %42:vreg_128 = GLOBAL_LOAD_DWORDX4 %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_DWORDX4 %11, %42, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %43:vgpr_32 = GLOBAL_LOAD_SSHORT %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_SHORT %11, %43, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %44:vgpr_32 = GLOBAL_LOAD_USHORT %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_SHORT %11, %44, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %45:vgpr_32 = GLOBAL_LOAD_UBYTE %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_BYTE %11, %45, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %46:vgpr_32 = GLOBAL_LOAD_SBYTE %11, 16, 0, 0, implicit $exec :: (load 4)
- GLOBAL_STORE_BYTE %11, %46, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %47:vgpr_32 = GLOBAL_LOAD_SBYTE_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4)
- GLOBAL_STORE_BYTE_D16_HI %11, %47, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %48:vgpr_32 = GLOBAL_LOAD_UBYTE_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4)
- GLOBAL_STORE_BYTE_D16_HI %11, %48, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %49:vgpr_32 = GLOBAL_LOAD_SBYTE_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4)
- GLOBAL_STORE_BYTE_D16_HI %11, %49, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %50:vgpr_32 = GLOBAL_LOAD_UBYTE_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4)
- GLOBAL_STORE_BYTE_D16_HI %11, %50, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %51:vgpr_32 = GLOBAL_LOAD_SHORT_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4)
- GLOBAL_STORE_SHORT_D16_HI %11, %51, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
- %52:vgpr_32 = GLOBAL_LOAD_SHORT_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4)
- GLOBAL_STORE_SHORT_D16_HI %11, %52, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %10:vgpr_32 = GLOBAL_LOAD_DWORD %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_DWORD %11, %10, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %40:vreg_64 = GLOBAL_LOAD_DWORDX2 %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_DWORDX2 %11, %40, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %41:vreg_96 = GLOBAL_LOAD_DWORDX3 %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_DWORDX3 %11, %41, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %42:vreg_128 = GLOBAL_LOAD_DWORDX4 %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_DWORDX4 %11, %42, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %43:vgpr_32 = GLOBAL_LOAD_SSHORT %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_SHORT %11, %43, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %44:vgpr_32 = GLOBAL_LOAD_USHORT %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_SHORT %11, %44, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %45:vgpr_32 = GLOBAL_LOAD_UBYTE %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_BYTE %11, %45, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %46:vgpr_32 = GLOBAL_LOAD_SBYTE %11, 16, 0, 0, 0, implicit $exec :: (load 4)
+ GLOBAL_STORE_BYTE %11, %46, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %47:vgpr_32 = GLOBAL_LOAD_SBYTE_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+ GLOBAL_STORE_BYTE_D16_HI %11, %47, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %48:vgpr_32 = GLOBAL_LOAD_UBYTE_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+ GLOBAL_STORE_BYTE_D16_HI %11, %48, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %49:vgpr_32 = GLOBAL_LOAD_SBYTE_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+ GLOBAL_STORE_BYTE_D16_HI %11, %49, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %50:vgpr_32 = GLOBAL_LOAD_UBYTE_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+ GLOBAL_STORE_BYTE_D16_HI %11, %50, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %51:vgpr_32 = GLOBAL_LOAD_SHORT_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+ GLOBAL_STORE_SHORT_D16_HI %11, %51, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ %52:vgpr_32 = GLOBAL_LOAD_SHORT_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4)
+ GLOBAL_STORE_SHORT_D16_HI %11, %52, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
%53:vgpr_32 = GLOBAL_ATOMIC_XOR_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %53, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %53, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_XOR %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%54:vgpr_32 = GLOBAL_ATOMIC_SMIN_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %54, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %54, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SMIN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%55:vgpr_32 = GLOBAL_ATOMIC_AND_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %55, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %55, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_AND %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%56:vgpr_32 = GLOBAL_ATOMIC_SWAP_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %56, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %56, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SWAP %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%57:vgpr_32 = GLOBAL_ATOMIC_SMAX_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %57, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %57, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SMAX %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%58:vgpr_32 = GLOBAL_ATOMIC_UMIN_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %58, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %58, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_UMIN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%59:vgpr_32 = GLOBAL_ATOMIC_UMAX_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %59, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %59, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_UMAX %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%60:vgpr_32 = GLOBAL_ATOMIC_OR_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %60, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %60, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_OR %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%61:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %61, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %61, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_ADD %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%62:vgpr_32 = GLOBAL_ATOMIC_SUB_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %62, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %62, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SUB %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%63:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %63, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %63, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_CMPSWAP %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%64:vgpr_32 = GLOBAL_ATOMIC_INC_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %64, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %64, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_INC %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%65:vgpr_32 = GLOBAL_ATOMIC_DEC_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORD %11, %65, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORD %11, %65, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_DEC %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%66:vreg_64 = GLOBAL_ATOMIC_OR_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %66, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %66, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_OR_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%67:vreg_64 = GLOBAL_ATOMIC_XOR_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %67, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %67, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_XOR_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%68:vreg_64 = GLOBAL_ATOMIC_AND_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %68, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %68, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_AND_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%69:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %69, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %69, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_ADD_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%70:vreg_64 = GLOBAL_ATOMIC_SUB_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %70, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %70, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SUB_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%71:vreg_64 = GLOBAL_ATOMIC_DEC_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %71, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %71, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_DEC_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%72:vreg_64 = GLOBAL_ATOMIC_INC_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %72, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %72, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_INC_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%73:vreg_64 = GLOBAL_ATOMIC_SMIN_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %73, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %73, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SMIN_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%74:vreg_64 = GLOBAL_ATOMIC_SWAP_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %74, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %74, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SWAP_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%75:vreg_64 = GLOBAL_ATOMIC_SMAX_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %75, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %75, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_SMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%76:vreg_64 = GLOBAL_ATOMIC_UMIN_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %76, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %76, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_UMIN_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%77:vreg_64 = GLOBAL_ATOMIC_UMAX_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_UMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
%79:sreg_128 = REG_SEQUENCE %4, %subreg.sub0, %4, %subreg.sub1, %4, %subreg.sub2, %4, %subreg.sub3
%80:vreg_128 = COPY %79
%78:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
- GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
GLOBAL_ATOMIC_CMPSWAP_X2 %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1)
S_ENDPGM 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir Tue Apr 30 15:08:23 2019
@@ -12,7 +12,7 @@ body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10
- BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, implicit $exec
$vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
S_ENDPGM 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir Tue Apr 30 15:08:23 2019
@@ -16,7 +16,7 @@ name: hazard-inlineasm
body: |
bb.0:
- FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def $vgpr26_vgpr27, 2818058, def dead $sgpr14_sgpr15, 589833, $sgpr12, 327689, killed $vgpr51, 2621449, $vgpr46_vgpr47
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir Tue Apr 30 15:08:23 2019
@@ -21,7 +21,7 @@ body: |
liveins: $sgpr2, $sgpr3, $sgpr4
$sgpr6 = S_MOV_B32 killed $sgpr3
- renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0
+ renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0, 0
$m0 = S_MOV_B32 killed renamable $sgpr4
dead renamable $sgpr0 = KILL undef renamable $sgpr2
renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $m0, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll Tue Apr 30 15:08:23 2019
@@ -12,7 +12,7 @@ define amdgpu_kernel void @extract_w_off
; GCN: bb.0.entry:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
- ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
+ ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
; GCN: renamable $sgpr2 = COPY renamable $sgpr1
; GCN: renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
; GCN: renamable $sgpr5 = S_MOV_B32 61440
@@ -101,7 +101,7 @@ define amdgpu_kernel void @extract_w_off
; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
; GCN: S_ENDPGM
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir Tue Apr 30 15:08:23 2019
@@ -49,10 +49,10 @@ body: |
bb.0 (%ir-block.2):
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
$vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
$vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir Tue Apr 30 15:08:23 2019
@@ -230,28 +230,28 @@ name: vmem_gt_8dw_store
body: |
bb.0:
- BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.1
bb.1:
- FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
@@ -549,14 +549,14 @@ body: |
$flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
$flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
DBG_VALUE $noreg, 2, !5, !11, debug-location !12
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
$sgpr8 = S_MOV_B32 $sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
- BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
$sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
- BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir Tue Apr 30 15:08:23 2019
@@ -55,7 +55,7 @@ body: |
bb.0.entry:
liveins: $sgpr0_sgpr1
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
@@ -64,7 +64,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
S_BRANCH %bb.3
@@ -72,7 +72,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
bb.3.done:
@@ -80,7 +80,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir Tue Apr 30 15:08:23 2019
@@ -57,15 +57,15 @@ body: |
%4.sub1 = COPY %3.sub0
undef %5.sub0 = COPY %4.sub1
%5.sub1 = COPY %4.sub0
- FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%6 = IMPLICIT_DEF
undef %7.sub0_sub1 = COPY %6
%7.sub2 = COPY %3.sub0
- FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%8 = IMPLICIT_DEF
undef %9.sub0_sub1_sub2 = COPY %8
%9.sub3 = COPY %3.sub0
- FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll?rev=359621&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll Tue Apr 30 15:08:23 2019
@@ -0,0 +1,19 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+
+declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0
+
+; GCN-LABEL: {{^}}test_s_get_waveid_in_workgroup:
+; GFX10: global_store_dword
+; GFX10: s_get_waveid_in_workgroup [[DEST:s[0-9]+]]
+; GFX10: s_waitcnt lgkmcnt(0)
+; GFX10: v_mov_b32_e32 [[VDEST:v[0-9]+]], [[DEST]]
+; GFX10: global_store_dword v[{{[0-9:]+}}], [[VDEST]], off
+define amdgpu_kernel void @test_s_get_waveid_in_workgroup(i32 addrspace(1)* %out) {
+; Make sure %out is loaded and assiciated wait count already inserted
+ store i32 0, i32 addrspace(1)* %out
+ %v = call i32 @llvm.amdgcn.s.get.waveid.in.workgroup()
+ store i32 %v, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir Tue Apr 30 15:08:23 2019
@@ -80,13 +80,13 @@ body: |
successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000)
liveins: $vgpr0, $sgpr0_sgpr1
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec
$vgpr1_vgpr2 = V_LSHL_B64 $vgpr0_vgpr1, 3, implicit $exec
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 0
S_WAITCNT 127
- $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
+ $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
$vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -97,7 +97,7 @@ body: |
successors: %bb.2.exit(0x80000000)
liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec
$sgpr4_sgpr5 = S_MOV_B64 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir Tue Apr 30 15:08:23 2019
@@ -11,10 +11,10 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
- renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
+ renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir Tue Apr 30 15:08:23 2019
@@ -13,14 +13,14 @@
name: load_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -37,14 +37,14 @@ body: |
name: load_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -61,14 +61,14 @@ body: |
name: load_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -85,14 +85,14 @@ body: |
name: load_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -109,14 +109,14 @@ body: |
name: load_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -133,14 +133,14 @@ body: |
name: load_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -157,14 +157,14 @@ body: |
name: load_wavefront_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -181,14 +181,14 @@ body: |
name: load_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -205,14 +205,14 @@ body: |
name: load_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -229,14 +229,14 @@ body: |
name: load_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -253,14 +253,14 @@ body: |
name: load_workgroup_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -277,14 +277,14 @@ body: |
name: load_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -301,14 +301,14 @@ body: |
name: load_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -325,14 +325,14 @@ body: |
name: load_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -349,14 +349,14 @@ body: |
name: load_agent_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -373,14 +373,14 @@ body: |
name: load_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -397,14 +397,14 @@ body: |
name: load_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -421,14 +421,14 @@ body: |
name: load_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -445,14 +445,14 @@ body: |
name: load_system_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -469,14 +469,14 @@ body: |
name: load_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -493,8 +493,8 @@ body: |
name: store_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -515,8 +515,8 @@ body: |
name: store_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -537,8 +537,8 @@ body: |
name: store_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -559,8 +559,8 @@ body: |
name: store_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -581,8 +581,8 @@ body: |
name: store_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -603,8 +603,8 @@ body: |
name: store_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -625,8 +625,8 @@ body: |
name: store_wavefront_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -647,8 +647,8 @@ body: |
name: store_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -669,8 +669,8 @@ body: |
name: store_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -691,8 +691,8 @@ body: |
name: store_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -713,8 +713,8 @@ body: |
name: store_workgroup_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -735,8 +735,8 @@ body: |
name: store_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -757,8 +757,8 @@ body: |
name: store_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -779,8 +779,8 @@ body: |
name: store_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -801,8 +801,8 @@ body: |
name: store_agent_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -823,8 +823,8 @@ body: |
name: store_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -845,8 +845,8 @@ body: |
name: store_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -867,8 +867,8 @@ body: |
name: store_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -889,8 +889,8 @@ body: |
name: store_system_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -911,8 +911,8 @@ body: |
name: store_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -933,8 +933,8 @@ body: |
name: atomicrmw_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -955,8 +955,8 @@ body: |
name: atomicrmw_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -977,8 +977,8 @@ body: |
name: atomicrmw_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -999,8 +999,8 @@ body: |
name: atomicrmw_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1021,8 +1021,8 @@ body: |
name: atomicrmw_singlethread_acq_rel
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1043,8 +1043,8 @@ body: |
name: atomicrmw_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir Tue Apr 30 15:08:23 2019
@@ -16,27 +16,27 @@ body: |
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
S_CBRANCH_SCC0 %bb.1, implicit killed $scc
bb.2:
successors: %bb.3(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
S_BRANCH %bb.3
@@ -45,7 +45,7 @@ body: |
successors: %bb.3(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
@@ -55,11 +55,11 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir Tue Apr 30 15:08:23 2019
@@ -62,7 +62,7 @@
# CHECK-LABEL: name: multiple_mem_operands
# CHECK-LABEL: bb.3.done:
-# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0
+# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0, 0
name: multiple_mem_operands
alignment: 0
@@ -110,27 +110,27 @@ body: |
successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
bb.2.else:
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
S_BRANCH %bb.3.done
@@ -139,7 +139,7 @@ body: |
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
@@ -149,11 +149,11 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir Tue Apr 30 15:08:23 2019
@@ -42,7 +42,7 @@
# CHECK-LABEL: name: multiple_mem_operands
# CHECK-LABEL: bb.3.done:
-# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0
+# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0
name: multiple_mem_operands
alignment: 0
@@ -90,27 +90,27 @@ body: |
successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
bb.2.else:
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
S_BRANCH %bb.3.done
@@ -119,7 +119,7 @@ body: |
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
@@ -129,11 +129,11 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir Tue Apr 30 15:08:23 2019
@@ -13,14 +13,14 @@
name: load_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -37,14 +37,14 @@ body: |
name: load_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -61,14 +61,14 @@ body: |
name: load_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -85,14 +85,14 @@ body: |
name: load_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -109,14 +109,14 @@ body: |
name: load_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -133,14 +133,14 @@ body: |
name: load_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -157,14 +157,14 @@ body: |
name: load_wavefront_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -181,14 +181,14 @@ body: |
name: load_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -205,14 +205,14 @@ body: |
name: load_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -229,14 +229,14 @@ body: |
name: load_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -253,14 +253,14 @@ body: |
name: load_workgroup_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -277,14 +277,14 @@ body: |
name: load_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -301,14 +301,14 @@ body: |
name: load_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -325,14 +325,14 @@ body: |
name: load_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -349,14 +349,14 @@ body: |
name: load_agent_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -373,14 +373,14 @@ body: |
name: load_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -397,14 +397,14 @@ body: |
name: load_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -421,14 +421,14 @@ body: |
name: load_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -445,14 +445,14 @@ body: |
name: load_system_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -469,14 +469,14 @@ body: |
name: load_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -493,8 +493,8 @@ body: |
name: store_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -515,8 +515,8 @@ body: |
name: store_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -537,8 +537,8 @@ body: |
name: store_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -559,8 +559,8 @@ body: |
name: store_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -581,8 +581,8 @@ body: |
name: store_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -603,8 +603,8 @@ body: |
name: store_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -625,8 +625,8 @@ body: |
name: store_wavefront_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -647,8 +647,8 @@ body: |
name: store_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -669,8 +669,8 @@ body: |
name: store_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -691,8 +691,8 @@ body: |
name: store_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -713,8 +713,8 @@ body: |
name: store_workgroup_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -735,8 +735,8 @@ body: |
name: store_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -757,13 +757,14 @@ body: |
name: store_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
+
...
---
@@ -778,8 +779,8 @@ body: |
name: store_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -800,8 +801,8 @@ body: |
name: store_agent_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -822,8 +823,8 @@ body: |
name: store_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -844,8 +845,8 @@ body: |
name: store_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -866,8 +867,8 @@ body: |
name: store_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -888,8 +889,8 @@ body: |
name: store_system_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -910,8 +911,8 @@ body: |
name: store_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -932,8 +933,8 @@ body: |
name: atomicrmw_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -954,8 +955,8 @@ body: |
name: atomicrmw_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -976,8 +977,8 @@ body: |
name: atomicrmw_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -998,8 +999,8 @@ body: |
name: atomicrmw_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1020,8 +1021,8 @@ body: |
name: atomicrmw_singlethread_acq_rel
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1042,8 +1043,8 @@ body: |
name: atomicrmw_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir Tue Apr 30 15:08:23 2019
@@ -2,12 +2,12 @@
# GCN-LABEL: {{^}}name: vector_clause{{$}}
# GCN: early-clobber %2:vreg_128, early-clobber %4:vreg_128, early-clobber %1:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
+# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
# GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
---
name: vector_clause
@@ -21,24 +21,24 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_full{{$}}
# GCN: early-clobber %1:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
-# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
-# GCN-NEXT: internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
-# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
+# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
---
name: subreg_full
@@ -49,20 +49,20 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
- %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
- %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
- %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+ undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+ %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+ %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+ %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_part{{$}}
# GCN: undef early-clobber %1.sub0_sub1:vreg_128, undef early-clobber %1.sub3:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
-# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
-# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
+# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
---
name: subreg_part
@@ -73,18 +73,18 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec
- %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec
- %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec
+ undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
+ %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
+ %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: dead{{$}}
# GCN: dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
+# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
# GCN-NEXT: }
---
@@ -99,18 +99,18 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
- dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
- dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
- dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
+ dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+ dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+ dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+ dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_dead{{$}}
# GCN: early-clobber %1:vreg_64 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
-# GCN-NEXT: dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
+# GCN-NEXT: %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: }
-# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec
---
name: subreg_dead
@@ -121,15 +121,15 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
- dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, implicit $exec
+ undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+ dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: kill{{$}}
# GCN: early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, %1, implicit $exec {
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: }
---
@@ -144,17 +144,17 @@ body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: indirect{{$}}
-# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, implicit $exec
+# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %1, implicit $exec {
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, implicit $exec
-# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: }
---
@@ -168,18 +168,18 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, implicit $exec
+ %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: stack{{$}}
# GCN: %0:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, implicit $exec
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
+# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
---
name: stack
@@ -193,33 +193,33 @@ stack:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: overflow_counter{{$}}
# GCN: dead early-clobber %7:vgpr_32, dead early-clobber %14:vgpr_32, dead early-clobber %2:vgpr_32, dead early-clobber %9:vgpr_32, dead early-clobber %4:vgpr_32, dead early-clobber %11:vgpr_32, dead early-clobber %6:vgpr_32, dead early-clobber %13:vgpr_32, dead early-clobber %1:vgpr_32, dead early-clobber %8:vgpr_32, dead early-clobber %15:vgpr_32, dead early-clobber %3:vgpr_32, dead early-clobber %10:vgpr_32, dead early-clobber %5:vgpr_32, dead early-clobber %12:vgpr_32 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
-# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, implicit $exec
-# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, implicit $exec
-# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, implicit $exec
-# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
-# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, implicit $exec
-# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, implicit $exec
-# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, implicit $exec
-# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, implicit $exec
-# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, implicit $exec
-# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, implicit $exec
+# GCN-NEXT: dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec
# GCN-NEXT: }
# GCN-NEXT: dead early-clobber %16:vgpr_32, dead early-clobber %17:vgpr_32 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, implicit $exec
-# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, implicit $exec
+# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec
# GCN-NEXT: }
---
@@ -247,36 +247,36 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, implicit $exec
- %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, implicit $exec
- %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, implicit $exec
- %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, implicit $exec
- %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec
- %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, implicit $exec
- %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, implicit $exec
- %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, implicit $exec
- %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec
- %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, implicit $exec
- %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, implicit $exec
- %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, implicit $exec
- %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, implicit $exec
- %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, implicit $exec
- %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, implicit $exec
- %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, implicit $exec
- %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, implicit $exec
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
+ %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec
+ %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec
+ %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
+ %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec
+ %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec
+ %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
+ %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec
+ %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec
+ %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec
+ %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec
+ %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec
+ %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec
+ %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec
+ %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: reg_pressure{{$}}
# GCN: dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, implicit $exec
+# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec
# GCN-NEXT: }
# GCN-NEXT: dead early-clobber %7:vreg_128, dead early-clobber %6:vreg_128 = BUNDLE %0, implicit $exec {
-# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, implicit $exec
-# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, implicit $exec
+# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec
# GCN-NEXT: }
---
@@ -294,13 +294,13 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec
- %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, implicit $exec
- %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, implicit $exec
- %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
+ %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec
+ %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec
+ %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: image_clause{{$}}
@@ -336,8 +336,8 @@ body: |
# GCN-LABEL: {{^}}name: mixed_clause{{$}}
# GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: }
---
@@ -356,8 +356,8 @@ body: |
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
- %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: atomic{{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir Tue Apr 30 15:08:23 2019
@@ -23,7 +23,7 @@
# COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
# COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
# COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# COMMON-LABEL bb.2:
@@ -47,7 +47,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -69,7 +69,7 @@ body: |
# COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
# COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
# COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# COMMON-LABEL bb.2:
@@ -93,7 +93,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -115,7 +115,7 @@ body: |
# COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
# COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
# COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# COMMON-LABEL bb.2:
@@ -139,7 +139,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -155,7 +155,7 @@ body: |
# COMMON: %9:vgpr_32 = V_ADD_I32_e32 %12.sub0, %4.sub0, implicit-def $vcc, implicit $exec
# COMMON: %10:vgpr_32 = V_ADDC_U32_e32 %12.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
# COMMON: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
-# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, implicit $exec
+# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
---
name: addr64
liveins:
@@ -175,7 +175,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -198,7 +198,7 @@ body: |
# NO-ADDR64: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
# NO-ADDR64: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
+# NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
# NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# NO-ADDR64-LABEL bb.2:
@@ -211,7 +211,7 @@ body: |
# ADDR64: [[RSRCFMTHI:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440
# ADDR64: [[ZERORSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3
# ADDR64: [[VADDR64:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[RSRCPTR]].sub0, %subreg.sub0, [[RSRCPTR]].sub1, %subreg.sub1
-# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, implicit $exec
+# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
---
name: offset
@@ -232,7 +232,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
Modified: llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir Tue Apr 30 15:08:23 2019
@@ -105,8 +105,8 @@ body: |
%3 = COPY $sgpr0_sgpr1
%2 = COPY $vgpr0
- %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
- %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+ %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
+ %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
%6 = COPY %7
%9 = S_MOV_B32 0
%10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1
@@ -137,7 +137,7 @@ body: |
%28 = REG_SEQUENCE %6, 17, killed %27, 18
%29 = V_MOV_B32_e32 0, implicit $exec
%30 = COPY %24
- BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.bb2:
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -203,9 +203,9 @@ body: |
%3 = COPY $sgpr0_sgpr1
%2 = COPY $vgpr0
- %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
- %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
- %9 = S_LOAD_DWORDX2_IMM %3, 13, 0
+ %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
+ %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
+ %9 = S_LOAD_DWORDX2_IMM %3, 13, 0, 0
%6 = COPY %7
%10 = S_MOV_B32 0
%11 = REG_SEQUENCE %2, %subreg.sub0, killed %10, %subreg.sub1
@@ -243,7 +243,7 @@ body: |
%37 = REG_SEQUENCE %6, 17, killed %36, 18
%38 = V_MOV_B32_e32 0, implicit $exec
%39 = COPY %33
- BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.bb2:
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -300,8 +300,8 @@ body: |
%3 = COPY $sgpr0_sgpr1
%2 = COPY $vgpr0
- %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
- %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+ %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
+ %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
%6 = COPY %7
%9 = S_MOV_B32 0
%10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1
@@ -332,7 +332,7 @@ body: |
%28 = REG_SEQUENCE %6, 17, killed %27, 18
%29 = V_MOV_B32_e32 0, implicit $exec
%30 = COPY %24
- BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.bb2:
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir Tue Apr 30 15:08:23 2019
@@ -151,7 +151,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -159,7 +159,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -188,7 +188,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -196,7 +196,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -225,7 +225,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -233,14 +233,14 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
-# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
# CHECK-NEXT: SI_MASK_BRANCH
@@ -255,7 +255,7 @@ body: |
$vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
- BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
SI_MASK_BRANCH %bb.2, implicit $exec
@@ -266,7 +266,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -274,7 +274,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -304,7 +304,7 @@ body: |
bb.1.if:
liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
@@ -312,7 +312,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -346,7 +346,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -356,7 +356,7 @@ body: |
$sgpr1 = S_MOV_B32 1
$sgpr2 = S_MOV_B32 -1
$sgpr3 = S_MOV_B32 61440
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -387,7 +387,7 @@ body: |
S_SLEEP 0, implicit $sgpr2_sgpr3
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -395,7 +395,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -426,7 +426,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -434,7 +434,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -463,7 +463,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -471,7 +471,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -500,7 +500,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -508,7 +508,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -539,7 +539,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -547,6 +547,6 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir Tue Apr 30 15:08:23 2019
@@ -27,12 +27,12 @@ body: |
; CHECK: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr5 = COPY $sgpr4
; CHECK: $sgpr6 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc
- ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+ ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
; CHECK: S_BRANCH %bb.1
; CHECK: bb.1:
; CHECK: liveins: $sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr4 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc
- ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+ ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
; CHECK: S_ENDPGM 0, implicit $vgpr0
bb.0:
$vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir Tue Apr 30 15:08:23 2019
@@ -8,7 +8,7 @@ name: diffoporder_add
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -34,12 +34,12 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 6144
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
...
---
@@ -61,7 +61,7 @@ name: LowestInMiddle
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -87,17 +87,17 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 6400
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
%39:sgpr_32 = S_MOV_B32 11200
%40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
%42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
- %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec
+ %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
...
---
@@ -114,7 +114,7 @@ name: NegativeDistance
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -140,17 +140,17 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 8192
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
%39:sgpr_32 = S_MOV_B32 10240
%40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
%42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
- %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec
+ %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
...
---
@@ -159,7 +159,7 @@ name: assert_hit
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -186,5 +186,5 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir Tue Apr 30 15:08:23 2019
@@ -22,7 +22,7 @@ body: |
$sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
$sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
- $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0
+ $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0, 0
S_WAITCNT 127
$vgpr0 = V_XOR_B32_e32 killed $sgpr10, killed $vgpr0, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir Tue Apr 30 15:08:23 2019
@@ -185,9 +185,9 @@ body: |
bb.28:
%9 = S_FF1_I32_B32 undef %10
%13 = V_MAD_U32_U24 killed %9, 48, 32, 0, implicit $exec
- %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
%46 = V_AND_B32_e32 1, killed %45, implicit $exec
- %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4)
+ %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0, 0 :: (dereferenceable invariant load 4)
%25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec
%26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec
%62 = IMPLICIT_DEF
Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir Tue Apr 30 15:08:23 2019
@@ -96,14 +96,14 @@ body: |
%0.sub0 = COPY killed %12
%21 = COPY killed %18
%21.sub0 = COPY killed %15
- %22 = S_LOAD_DWORD_IMM killed %21, 2, 0
+ %22 = S_LOAD_DWORD_IMM killed %21, 2, 0, 0
%23 = S_MOV_B32 491436
undef %24.sub0 = COPY killed %22
%24.sub1 = COPY killed %23
- %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0
+ %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0, 0
%1 = COPY killed %25
- %26 = S_LOAD_DWORDX2_IMM %0, 2, 0
- dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0
+ %26 = S_LOAD_DWORDX2_IMM %0, 2, 0, 0
+ dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0, 0
S_CBRANCH_SCC0 %bb.1, implicit undef $scc
bb.5:
@@ -129,8 +129,8 @@ body: |
bb.2:
%4 = COPY killed %59
%3 = COPY killed %58
- %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0
- %40 = S_LOAD_DWORD_IMM killed %39, 0, 0
+ %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0, 0
+ %40 = S_LOAD_DWORD_IMM killed %39, 0, 0, 0
%43 = V_MOV_B32_e32 -1102263091, implicit $exec
%60 = COPY killed %4
%61 = COPY killed %3
Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir Tue Apr 30 15:08:23 2019
@@ -56,8 +56,8 @@ body: |
%3 = COPY killed $vgpr0
%0 = COPY killed $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORD_IMM killed %0, 13, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
%18 = V_ASHRREV_I32_e32 31, %3, implicit $exec
undef %19.sub0 = COPY killed %3
%19.sub1 = COPY killed %18
@@ -70,7 +70,7 @@ body: |
%13.sub2_sub3 = COPY killed %12
%20 = V_LSHL_B64 killed %19, 2, implicit $exec
%16 = COPY killed %5
- BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir Tue Apr 30 15:08:23 2019
@@ -16,7 +16,7 @@ body: |
%23:vgpr_32 = V_CVT_U32_F32_e32 killed %21, implicit $exec
%108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec
undef %109.sub1:vreg_128 = COPY %108
- %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0 :: (dereferenceable invariant load 4)
+ %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0, 0 :: (dereferenceable invariant load 4)
S_CMP_EQ_U32 killed %28, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
@@ -47,7 +47,7 @@ body: |
S_BRANCH %bb.6
bb.6:
- %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0 :: (dereferenceable invariant load 4)
+ %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0, 0 :: (dereferenceable invariant load 4)
%39:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec
%40:vgpr_32 = V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec
%41:vgpr_32 = V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $exec
@@ -83,7 +83,7 @@ body: |
S_BRANCH %bb.8
bb.8:
- dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0 :: (dereferenceable invariant load 4)
+ dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0, 0 :: (dereferenceable invariant load 4)
%138:vreg_128 = COPY killed %111
bb.9:
Modified: llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir Tue Apr 30 15:08:23 2019
@@ -63,12 +63,12 @@ body: |
bb.3:
%1 = COPY killed %17
- FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = COPY %1.sub1
%16 = COPY killed %1.sub0
undef %15.sub0 = COPY killed %16
%15.sub1 = COPY killed %14
- FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -134,10 +134,10 @@ body: |
%6.sub2 = COPY %6.sub0
bb.2:
- BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
S_SETPC_B64_return $sgpr30_sgpr31
Modified: llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir Tue Apr 30 15:08:23 2019
@@ -58,7 +58,7 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
S_ENDPGM 0
...
---
@@ -76,7 +76,7 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
S_DCACHE_WB
S_ENDPGM 0
...
@@ -97,7 +97,7 @@ machineFunctionInfo:
body: |
bb.0:
S_DCACHE_WB
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
S_ENDPGM 0
...
---
@@ -132,11 +132,11 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
S_ENDPGM 0
bb.1:
- S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0
S_ENDPGM 0
...
...
@@ -164,7 +164,7 @@ body: |
S_ENDPGM 0
bb.1:
- S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0
S_ENDPGM 0
...
---
@@ -182,6 +182,6 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
SI_RETURN_TO_EPILOG undef $vgpr0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir Tue Apr 30 15:08:23 2019
@@ -23,8 +23,8 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, implicit $exec
- ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, implicit $exec
+ ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, 0, implicit $exec
+ ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, 0, implicit $exec
; CHECK: undef %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
; CHECK: %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; CHECK: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
@@ -40,19 +40,19 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $exec
; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $exec
- ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, implicit $exec
+ ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec
; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec
; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $exec
- ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec
- ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, implicit $exec
- ; CHECK: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, implicit $exec
- ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec
- ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec
+ ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, 0, implicit $exec
+ ; CHECK: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, 0, implicit $exec
+ ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, implicit $exec
+ ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, 0, implicit $exec
; CHECK: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 2, [[DEF2]], implicit $exec
- ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, implicit $exec
+ ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, 0, implicit $exec
; CHECK: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
- ; CHECK: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, 0, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: S_SETREG_IMM32_B32 0, 1
@@ -69,14 +69,14 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vreg_64 = IMPLICIT_DEF
- %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, implicit $exec
- %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, implicit $exec
+ %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, 0, implicit $exec
undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec
%4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
%5:vreg_64 = COPY %2
undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $exec
%6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $exec
- %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, 0, implicit $exec
%8:vreg_64 = IMPLICIT_DEF
%9:vreg_64 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
@@ -90,15 +90,15 @@ body: |
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $exec
%19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $exec
- GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec
- %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, implicit $exec
- %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, implicit $exec
- %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec
- %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, implicit $exec
- %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec
+ %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, 0, implicit $exec
+ %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, 0, implicit $exec
+ %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, implicit $exec
+ %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, 0, implicit $exec
+ %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, 0, implicit $exec
%23:vreg_64 = V_LSHLREV_B64 2, %8, implicit $exec
S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17
- GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, 0, implicit $exec
bb.1:
S_SETREG_IMM32_B32 0, 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir Tue Apr 30 15:08:23 2019
@@ -200,12 +200,12 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
- %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
- %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0
+ %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0, 0
+ %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0, 0
+ %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0, 0
%11:sreg_32_xm0 = S_LSHR_B32 %10.sub0, 16, implicit-def dead $scc
%12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1
%13:vgpr_32 = V_MUL_LO_I32 0, %0, implicit $exec
@@ -217,29 +217,29 @@ body: |
%19:sreg_32_xm0_xexec = IMPLICIT_DEF
%20:vgpr_32 = V_ADD_I32_e32 %19, %0, implicit-def dead $vcc, implicit $exec
%21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32 %20, 12, %7, 0, implicit $exec
- %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, implicit $exec
+ %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, 0, implicit $exec
%24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32 %20, 48, %8, 0, implicit $exec
%26:vreg_128 = IMPLICIT_DEF
- undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0
+ undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0, 0
%27.sub1:sreg_64_xexec = S_MOV_B32 0
%28:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0, %28.sub0, implicit-def $scc
%29.sub1:sreg_64 = S_ADDC_U32 %5.sub1, %28.sub1, implicit-def dead $scc, implicit killed $scc
- undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0
+ undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0, 0
%27.sub0:sreg_64_xexec = IMPLICIT_DEF
%31:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
%32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0, implicit-def $scc
%33:sgpr_32 = S_ADDC_U32 %5.sub1, %31.sub1, implicit-def dead $scc, implicit killed $scc
%34:vgpr_32 = IMPLICIT_DEF
%35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32 %23, %34, 0, 0, implicit $exec
- %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, implicit $exec
+ %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, 0, implicit $exec
undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec
%38.sub0:vreg_64 = COPY %37.sub0
%39:vreg_64 = V_LSHLREV_B64 3, %38, implicit $exec
undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, 0, implicit $exec
%42:vgpr_32 = COPY %33
%40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec
- %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
+ %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
undef %45.sub1:vreg_64 = IMPLICIT_DEF
%45.sub0:vreg_64 = COPY %37.sub1
%46:vreg_64 = V_LSHLREV_B64 3, %45, implicit $exec
@@ -247,7 +247,7 @@ body: |
%49:vgpr_32 = COPY %33
%47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec
%51:vreg_64 = IMPLICIT_DEF
- undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8)
+ undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8)
%52.sub1:vreg_64 = IMPLICIT_DEF
%53:vreg_64 = V_LSHLREV_B64 3, %52, implicit $exec
undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, 0, implicit $exec
@@ -258,31 +258,31 @@ body: |
%59:sreg_64 = IMPLICIT_DEF
%60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def $scc
%61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead $scc, implicit killed $scc
- %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4)
+ %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4)
undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec
%63.sub0:vreg_64 = COPY %62.sub0
%64:vreg_64 = IMPLICIT_DEF
undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, 0, implicit $exec
%67:vgpr_32 = COPY %61
%65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec
- %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
+ %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
undef %70.sub1:vreg_64 = IMPLICIT_DEF
%70.sub0:vreg_64 = IMPLICIT_DEF
%71:vreg_64 = IMPLICIT_DEF
undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, 0, implicit $exec
%74:vgpr_32 = COPY %61
%72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec
- %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, implicit $exec
+ %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, implicit $exec
%77:vgpr_32 = IMPLICIT_DEF
%78:vgpr_32 = IMPLICIT_DEF
%79:vgpr_32 = V_MUL_F32_e32 0, %77, implicit $exec
%80:vgpr_32 = IMPLICIT_DEF
%81:vgpr_32 = IMPLICIT_DEF
%84:vgpr_32 = IMPLICIT_DEF
- BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, 0, implicit $exec
%85:vgpr_32 = IMPLICIT_DEF
%86:vgpr_32 = IMPLICIT_DEF
%87:vgpr_32 = IMPLICIT_DEF
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir Tue Apr 30 15:08:23 2019
@@ -47,7 +47,7 @@ body: |
liveins: $sgpr4_sgpr5
%1 = COPY $sgpr4_sgpr5
- %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORD_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$m0 = S_MOV_B32 -1
%7 = COPY %5
%6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir Tue Apr 30 15:08:23 2019
@@ -37,11 +37,11 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%12 = S_MOV_B32 123
%10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
%11 = V_ADD_I32_e32 %12, killed %10, implicit-def $vcc, implicit $exec
- FLAT_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -80,9 +80,9 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
%11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit-def $vcc, implicit $exec
- FLAT_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir Tue Apr 30 15:08:23 2019
@@ -29,19 +29,19 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
%161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
%164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
%162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
%171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, 0, implicit $exec
%174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
%172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -77,13 +77,13 @@ body: |
%64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
%161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
%164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
%162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -113,7 +113,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -143,7 +143,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -172,7 +172,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -201,7 +201,7 @@ body: |
%30:vreg_64 = COPY $sgpr0_sgpr1
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -232,7 +232,7 @@ body: |
%30:vreg_64 = COPY $sgpr0_sgpr1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -263,7 +263,7 @@ body: |
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%31:vreg_64 = COPY $vcc
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -294,7 +294,7 @@ body: |
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%31:vreg_64 = COPY $vcc
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -325,7 +325,7 @@ body: |
%32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -356,7 +356,7 @@ body: |
%32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -386,5 +386,5 @@ body: |
%31:vreg_64 = COPY killed $vcc
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir Tue Apr 30 15:08:23 2019
@@ -89,7 +89,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -139,7 +139,7 @@ body: |
%100 = V_MOV_B32_e32 %48, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -256,7 +256,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -315,7 +315,7 @@ body: |
%100 = V_MOV_B32_e32 %60, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -401,7 +401,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -442,6 +442,6 @@ body: |
%100 = V_MOV_B32_e32 $vcc_lo, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir Tue Apr 30 15:08:23 2019
@@ -36,8 +36,8 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = V_AND_B32_e32 65535, %3, implicit $exec
%6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
@@ -51,7 +51,7 @@ body: |
%13 = V_OR_B32_e64 %10, %12, implicit $exec
- FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -88,14 +88,14 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
%10:sreg_32_xm0 = S_MOV_B32 255
%11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
%17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
- FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
@@ -131,14 +131,14 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
%10:sreg_32_xm0 = S_MOV_B32 65535
%11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
%17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
- FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir Tue Apr 30 15:08:23 2019
@@ -203,7 +203,7 @@ body: |
liveins: $sgpr4_sgpr5
%4 = COPY $sgpr4_sgpr5
- %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%8 = S_MOV_B64 0
%7 = COPY %9
%30 = V_MOV_B32_e32 1, implicit $exec
@@ -221,26 +221,26 @@ body: |
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
%16 = REG_SEQUENCE %14, 1, %15, 2
%18 = COPY %16
- %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
+ %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
%60 = V_BFE_U32 %17, 8, 8, implicit $exec
%61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec
%70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
%66 = COPY %13
%65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%67 = REG_SEQUENCE %70, 1, killed %65, 2
- FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
+ FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
%37 = S_ADD_U32 %14, 4, implicit-def $scc
%38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
%71 = COPY killed %37
%72 = COPY killed %38
%41 = REG_SEQUENCE killed %71, 1, killed %72, 2
- %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
+ %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
%73 = V_BFE_U32 %40, 8, 8, implicit $exec
%74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec
%83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
%78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%80 = REG_SEQUENCE %83, 1, killed %78, 2
- FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
+ FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
%55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
%57 = REG_SEQUENCE %55, 1, killed %56, 2
@@ -365,7 +365,7 @@ body: |
liveins: $sgpr4_sgpr5
%4 = COPY $sgpr4_sgpr5
- %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%8 = S_MOV_B64 0
%7 = COPY %9
%30 = V_MOV_B32_e32 1, implicit $exec
@@ -384,26 +384,26 @@ body: |
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
%16 = REG_SEQUENCE %14, 1, %15, 2
%18 = COPY %16
- %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
+ %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
%60 = V_BFE_U32 %17, 8, 8, implicit $exec
%61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec
%70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
%66 = COPY %13
%65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%67 = REG_SEQUENCE %70, 1, killed %65, 2
- FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
+ FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
%37 = S_ADD_U32 %14, 4, implicit-def $scc
%38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
%71 = COPY killed %37
%72 = COPY killed %38
%41 = REG_SEQUENCE killed %71, 1, killed %72, 2
- %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
+ %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
%73 = V_BFE_U32 %40, 8, 8, implicit $exec
%74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec
%83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
%78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%80 = REG_SEQUENCE %83, 1, killed %78, 2
- FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
+ FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
%55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
%57 = REG_SEQUENCE %55, 1, killed %56, 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir Tue Apr 30 15:08:23 2019
@@ -41,7 +41,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%12 = V_LSHRREV_B32_e64 16, %3, implicit $exec
%13 = V_BCNT_U32_B32_e64 %3, killed %12, implicit-def $vcc, implicit $exec
@@ -56,6 +56,6 @@ body: |
%19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
%20 = V_MOV_B32_e64 %19, implicit $exec
- FLAT_STORE_DWORD %0, %20, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir Tue Apr 30 15:08:23 2019
@@ -85,7 +85,7 @@ body: |
bb.0:
%0:sreg_32_xm0 = COPY $sgpr5
%1:vreg_64 = IMPLICIT_DEF
- %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir Tue Apr 30 15:08:23 2019
@@ -21,7 +21,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
...
@@ -46,7 +46,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
...
@@ -71,7 +71,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
...
@@ -96,6 +96,6 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Tue Apr 30 15:08:23 2019
@@ -71,8 +71,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -81,11 +81,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
%29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -155,8 +155,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -165,11 +165,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
%29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -239,8 +239,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -249,11 +249,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
%29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -322,8 +322,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -332,12 +332,12 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
%9 = S_MOV_B64 0
%29, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -407,8 +407,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -417,12 +417,12 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
$vcc = S_MOV_B64 0
%29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -492,8 +492,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -502,11 +502,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
%29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir Tue Apr 30 15:08:23 2019
@@ -15,7 +15,7 @@ body: |
; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
; GCN: S_ENDPGM 0
%0:sgpr_64 = COPY $sgpr4_sgpr5
- %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0
%2:sreg_32_xm0 = S_AND_B32 %1, 255, implicit-def $scc
%3:sreg_32_xm0 = S_AND_B32 65535, %2, implicit-def $scc
S_ENDPGM 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir Tue Apr 30 15:08:23 2019
@@ -36,8 +36,8 @@ body: |
%3.sub1:sgpr_128 = S_AND_B32 %2, 65535, implicit-def dead $scc
%3.sub3:sgpr_128 = S_MOV_B32 151468
%3.sub2:sgpr_128 = S_MOV_B32 -1
- %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
- %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8)
+ %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
+ %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0, 0 :: (dereferenceable invariant load 8)
undef %9.sub0:vreg_128 = V_LSHL_ADD_U32 %6, 4, %4, implicit $exec
%9.sub1:vreg_128 = V_LSHL_ADD_U32 %5, 4, %0, implicit $exec
S_ENDPGM 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir Tue Apr 30 15:08:23 2019
@@ -19,7 +19,7 @@ body: |
%8:vgpr_32 = COPY %6
%7:vgpr_32 = V_ADD_I32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec
%10:sreg_32 = COPY %7
- %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0
+ %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0
$vgpr0 = COPY %9
SI_RETURN_TO_EPILOG $vgpr0
...
@@ -43,7 +43,7 @@ body: |
%8:vgpr_32 = COPY %6
%7:vgpr_32 = V_ADD_U32_e32 %4, killed %8, implicit $exec
%10:sreg_32 = COPY %7
- %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load 4)
+ %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0 :: (dereferenceable invariant load 4)
$vgpr0 = COPY %9
SI_RETURN_TO_EPILOG $vgpr0
Modified: llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir Tue Apr 30 15:08:23 2019
@@ -24,10 +24,10 @@ machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
- %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, implicit $flat_scr, implicit $exec
- %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, implicit $flat_scr, implicit $exec
+ %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $flat_scr, implicit $exec
+ %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $flat_scr, implicit $exec
S_NOP 0, implicit %0
- %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
- %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
+ %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
S_NOP 0, implicit %1
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir Tue Apr 30 15:08:23 2019
@@ -165,12 +165,12 @@ body: |
%18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec
%19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec
- %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0 :: (dereferenceable invariant load 16)
+ %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0, 0 :: (dereferenceable invariant load 16)
%22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $exec
%23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $exec
%24:vgpr_32 = COPY %20.sub3
%25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $exec
- %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0 :: (dereferenceable invariant load 16)
+ %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0, 0 :: (dereferenceable invariant load 16)
%28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $exec
%29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $exec
%30:vgpr_32 = V_RCP_F32_e32 %29, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll Tue Apr 30 15:08:23 2019
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s
; GCN-LABEL: name: syncscopes
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
define void @syncscopes(
i32 %agent,
i32* %agent_out,
Modified: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir Tue Apr 30 15:08:23 2019
@@ -44,7 +44,7 @@ body: |
liveins: $vgpr0, $sgpr4_sgpr5
$vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
- $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
+ $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
$vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
$vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
@@ -108,7 +108,7 @@ body: |
liveins: $vgpr0, $sgpr4_sgpr5
$vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
- $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
+ $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
$vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
$vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir Tue Apr 30 15:08:23 2019
@@ -78,7 +78,7 @@ body: |
bb.0.entry:
liveins: $sgpr0_sgpr1, $vcc
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
@@ -88,7 +88,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
@@ -96,7 +96,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
bb.3.done:
@@ -104,7 +104,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
@@ -140,7 +140,7 @@ body: |
bb.0.entry:
liveins: $sgpr0_sgpr1
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
@@ -149,7 +149,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
@@ -157,7 +157,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
bb.3.done:
@@ -165,7 +165,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir Tue Apr 30 15:08:23 2019
@@ -19,7 +19,7 @@ body: |
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_to_next
# GCN: bb.1:
@@ -40,7 +40,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_too_far
# GCN: bb.1:
@@ -61,7 +61,7 @@ body: |
$sgpr0 = S_MOV_B32 0
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_nops
# GCN: bb.1:
@@ -78,7 +78,7 @@ body: |
S_NOP 4
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_around
# GCN: bb.2:
@@ -107,7 +107,7 @@ body: |
S_NOP 0
bb.2:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_backedge
# GCN: S_NOP
@@ -123,7 +123,7 @@ body: |
$vgpr0 = IMPLICIT_DEF
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
bb.1:
$vgpr0 = IMPLICIT_DEF
@@ -156,7 +156,7 @@ body: |
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
bb.2:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_self_loop
# GCN: S_NOP
@@ -172,7 +172,7 @@ body: |
$vgpr0 = IMPLICIT_DEF
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
S_BRANCH %bb.0
...
@@ -198,7 +198,7 @@ body: |
successors: %bb.1
$sgpr0 = S_MOV_B32 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
S_BRANCH %bb.1
...
@@ -224,7 +224,7 @@ body: |
successors: %bb.1
$sgpr0 = S_MOV_B32 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
S_BRANCH %bb.1
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir Tue Apr 30 15:08:23 2019
@@ -13,8 +13,8 @@ body: |
$vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
$vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
- $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
$vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
@@ -23,7 +23,7 @@ body: |
bb.3:
successors: %bb.1
- $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
bb.1:
successors: %bb.5, %bb.2
@@ -43,7 +43,7 @@ body: |
bb.4:
successors: %bb.3, %bb.1
- $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec
V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir Tue Apr 30 15:08:23 2019
@@ -37,8 +37,8 @@ body: |
bb.2:
successors: %bb.3
- renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0
- renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0
+ renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0
+ renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0
bb.3:
successors: %bb.1, %bb.4
@@ -73,12 +73,12 @@ name: irreducible_loop_extended
body: |
bb.0:
successors: %bb.1, %bb.2
- $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0
+ $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0
S_CBRANCH_VCCZ %bb.2, implicit $vcc
bb.1:
successors: %bb.2
- BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
bb.2:
successors: %bb.3, %bb.6
@@ -86,18 +86,18 @@ body: |
bb.3:
successors: %bb.4, %bb.5
- BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
S_CBRANCH_VCCNZ %bb.5, implicit $vcc
bb.4:
successors: %bb.5
- renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0
+ renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0, 0
renamable $vgpr2 = BUFFER_ATOMIC_ADD_OFFSET_RTN killed renamable $vgpr2, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, implicit $exec
bb.5:
successors: %bb.6
bb.6:
- FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir Tue Apr 30 15:08:23 2019
@@ -15,11 +15,11 @@ body: |
bb.0:
S_BRANCH %bb.1
bb.1:
- GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, implicit $exec
- $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec
- $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, implicit $exec
- $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, 0, implicit $exec
+ $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec
+ $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, 0, implicit $exec
+ $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, 0, implicit $exec
S_CBRANCH_SCC1 %bb.1, implicit $scc
bb.2:
S_ENDPGM 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir Tue Apr 30 15:08:23 2019
@@ -20,7 +20,7 @@ body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0
- renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0
+ renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0, 0
renamable $vgpr13 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
S_WAITCNT -16257
renamable $vgpr0_vgpr1 = DS_READ2_B32 renamable $vgpr13, 0, 1, 0, implicit $m0, implicit $exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir Tue Apr 30 15:08:23 2019
@@ -44,21 +44,21 @@ name: flat_zero_waitcnt
body: |
bb.0:
successors: %bb.1
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
- $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
+ $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
S_BRANCH %bb.2
bb.2:
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
- $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+ $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
S_ENDPGM 0
...
@@ -79,11 +79,11 @@ name: single_fallthrough_successor_no_en
body: |
bb.0:
successors: %bb.1
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
$vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
- FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -106,15 +106,15 @@ name: single_branch_successor_not_next_b
body: |
bb.0:
successors: %bb.2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_BRANCH %bb.2
bb.1:
- FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
bb.2:
$vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
- FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir Tue Apr 30 15:08:23 2019
@@ -16,13 +16,13 @@ body: |
%0:sgpr_64 = COPY $sgpr0_sgpr1
%vreg123_1:vgpr_32 = COPY %11
%27:vreg_64 = REG_SEQUENCE %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
%vreg123_2:vgpr_32 = COPY %4
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%vreg123_3:vgpr_32 = COPY %5
%16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3
- BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir Tue Apr 30 15:08:23 2019
@@ -11,7 +11,7 @@
# CHECK: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
# CHECK: scratchWaveOffsetReg: '$sgpr50'
# CHECK: frameOffsetReg: '$sgpr50'
-# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
name: reserve_correct_register
tracksRegLiveness: true
machineFunctionInfo:
@@ -24,6 +24,6 @@ stack:
body: |
bb.0:
- renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir Tue Apr 30 15:08:23 2019
@@ -42,9 +42,9 @@
!0 = !{i32 1}
# GCN-LABEL: name: syncscopes
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
...
---
name: syncscopes
@@ -74,27 +74,27 @@ body: |
liveins: $sgpr4_sgpr5
S_WAITCNT 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
- $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 127
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
- FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out)
+ FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out)
S_WAITCNT 112
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
- FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+ FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
S_WAITCNT 112
$vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec
- FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
+ FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir Tue Apr 30 15:08:23 2019
@@ -40,19 +40,19 @@ body: |
$sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start), implicit-def $scc, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc
$sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0
$sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc
$sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc
$sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc
$sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc
$sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -70,18 +70,18 @@ body: |
$sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def $scc, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc
$sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0
$sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc
$sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc
$sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc
$sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc
$sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Added: llvm/trunk/test/MC/AMDGPU/flat-gfx10.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/flat-gfx10.s?rev=359621&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/flat-gfx10.s (added)
+++ llvm/trunk/test/MC/AMDGPU/flat-gfx10.s Tue Apr 30 15:08:23 2019
@@ -0,0 +1,119 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s
+
+flat_load_dword v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:-1
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:2047
+// GFX10: encoding: [0xff,0x07,0x30,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:2048
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:4 glc
+// GFX10: encoding: [0x04,0x00,0x31,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:4 glc slc
+// GFX10: encoding: [0x04,0x00,0x33,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_dword v1, v[3:4] offset:4 glc slc dlc
+// GFX10: encoding: [0x04,0x10,0x33,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_atomic_add v[3:4], v5 offset:8 slc
+// GFX10: encoding: [0x08,0x00,0xca,0xdc,0x03,0x05,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] offset:2047
+// GFX10: encoding: [0xff,0x07,0xc4,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 slc
+// GFX10: encoding: [0xff,0x07,0xc6,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4]
+// GFX10: encoding: [0x00,0x00,0xc4,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] slc
+// GFX10: encoding: [0x00,0x00,0xc6,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 glc
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_atomic_cmpswap v[1:2], v[3:4] glc
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc
+// GFX10: encoding: [0xff,0x07,0xc5,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc slc
+// GFX10: encoding: [0xff,0x07,0xc7,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] glc
+// GFX10: encoding: [0x00,0x00,0xc5,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] glc slc
+// GFX10: encoding: [0x00,0x00,0xc7,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] glc
+// GFX10: encoding: [0x00,0x00,0xc5,0xdc,0x01,0x03,0x7d,0x00]
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047
+// GFX10-ERR: error: too few operands for instruction
+
+flat_atomic_cmpswap v0, v[1:2], v[3:4] slc
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_atomic_swap v[3:4], v5 offset:16
+// GFX10: encoding: [0x10,0x00,0xc0,0xdc,0x03,0x05,0x7d,0x00]
+
+flat_store_dword v[3:4], v1 offset:16
+// GFX10: encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x7d,0x00]
+
+flat_store_dword v[3:4], v1, off
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1, s[0:1]
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1, s0
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], off
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], s[0:1]
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], s0
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4], exec_hi
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1, exec_hi
+// GFX10-ERR: error: invalid operand for instruction
+
+flat_load_ubyte_d16 v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x80,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_ubyte_d16_hi v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x84,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_sbyte_d16 v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x88,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_sbyte_d16_hi v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x8c,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_short_d16 v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x90,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_load_short_d16_hi v1, v[3:4]
+// GFX10: encoding: [0x00,0x00,0x94,0xdc,0x03,0x00,0x7d,0x01]
+
+flat_store_byte_d16_hi v[3:4], v1
+// GFX10: encoding: [0x00,0x00,0x64,0xdc,0x03,0x01,0x7d,0x00]
+
+flat_store_short_d16_hi v[3:4], v1
+// GFX10: encoding: [0x00,0x00,0x6c,0xdc,0x03,0x01,0x7d,0x00]
Modified: llvm/trunk/test/MC/AMDGPU/flat-global.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/flat-global.s?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/flat-global.s (original)
+++ llvm/trunk/test/MC/AMDGPU/flat-global.s Tue Apr 30 15:08:23 2019
@@ -2,364 +2,527 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR %s
+
global_load_ubyte v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x20,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_ubyte v1, v[3:4], off ; encoding: [0x00,0x80,0x40,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_ubyte v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x20,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_sbyte v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x24,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_sbyte v1, v[3:4], off ; encoding: [0x00,0x80,0x44,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_sbyte v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x24,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_ushort v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x28,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_ushort v1, v[3:4], off ; encoding: [0x00,0x80,0x48,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_ushort v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x28,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_sshort v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x2c,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_sshort v1, v[3:4], off ; encoding: [0x00,0x80,0x4c,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_sshort v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x2c,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_dword v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_dword v1, v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x30,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_dwordx2 v[1:2], v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_dwordx2 v[1:2], v[3:4], off ; encoding: [0x00,0x80,0x54,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_dwordx2 v[1:2], v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x34,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_dwordx3 v[1:3], v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x3c,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_dwordx3 v[1:3], v[3:4], off ; encoding: [0x00,0x80,0x58,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_dwordx3 v[1:3], v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x3c,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_load_dwordx4 v[1:4], v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x38,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_dwordx4 v[1:4], v[3:4], off ; encoding: [0x00,0x80,0x5c,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+global_load_dwordx4 v[1:4], v[3:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x38,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
// FIXME: VI error should be instruction nto supported
global_load_dword v1, v[3:4], off offset:0
+// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: :41: error: not a valid operand.
global_load_dword v1, v[3:4], off offset:4095
+// GFX10-ERR: error: invalid operand for instruction
// GFX9: global_load_dword v1, v[3:4], off offset:4095 ; encoding: [0xff,0x8f,0x50,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: :41: error: not a valid operand.
global_load_dword v1, v[3:4], off offset:-1
+// GFX10: encoding: [0xff,0x8f,0x30,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_dword v1, v[3:4], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: :41: error: not a valid operand.
global_load_dword v1, v[3:4], off offset:-4096
+// GFX10-ERR: error: invalid operand for instruction
// GFX9: global_load_dword v1, v[3:4], off offset:-4096 ; encoding: [0x00,0x90,0x50,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: :41: error: not a valid operand.
global_load_dword v1, v[3:4], off offset:4096
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: :35: error: invalid operand for instruction
// VI-ERR: :41: error: not a valid operand.
global_load_dword v1, v[3:4] off, offset:-4097
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: :35: error: invalid operand for instruction
// VI-ERR: :41: error: not a valid operand.
global_store_byte v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_byte v[3:4], v1, off ; encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+global_store_byte v[3:4], v1, off dlc
+// GFX10: encoding: [0x00,0x90,0x60,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_store_short v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_short v[3:4], v1, off ; encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+global_store_short v[3:4], v1, off dlc
+// GFX10: encoding: [0x00,0x90,0x68,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_store_dword v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+global_store_dword v[3:4], v1, off dlc
+// GFX10: encoding: [0x00,0x90,0x70,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_store_dwordx2 v[3:4], v[1:2], off
+// GFX10: encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_dwordx2 v[3:4], v[1:2], off ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+global_store_dwordx2 v[3:4], v[1:2], off dlc
+// GFX10: encoding: [0x00,0x90,0x74,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_store_dwordx3 v[3:4], v[1:3], off
+// GFX10: encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_dwordx3 v[3:4], v[1:3], off ; encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+global_store_dwordx3 v[3:4], v[1:3], off dlc
+// GFX10: encoding: [0x00,0x90,0x7c,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_store_dwordx4 v[3:4], v[1:4], off
+// GFX10: encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_dwordx4 v[3:4], v[1:4], off ; encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+global_store_dwordx4 v[3:4], v[1:4], off dlc
+// GFX10: encoding: [0x00,0x90,0x78,0xdc,0x03,0x01,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
global_store_dword v[3:4], v1, off offset:12
+// GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: :42: error: not a valid operand
global_load_dword v1, v[3:4], s[2:3]
+// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
// GFX9: global_load_dword v1, v[3:4], s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: instruction not supported on this GPU
global_load_dword v1, v[3:4], s[2:3] offset:24
+// GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
// GFX9: global_load_dword v1, v[3:4], s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: :44: error: not a valid operand.
global_load_dword v1, v[3:4], s[2:3] offset:-8
+// GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01]
// GFX9: global_load_dword v1, v[3:4], s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: :44: error: not a valid operand.
global_store_dword v[3:4], v1, s[2:3]
+// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v[3:4], v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: instruction not supported on this GPU
global_store_dword v[3:4], v1, s[2:3] offset:24
+// GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v[3:4], v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :45: error: not a valid operand.
global_store_dword v[3:4], v1, s[2:3] offset:-8
+// GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v[3:4], v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :45: error: not a valid operand.
// XXX: Is this valid?
global_store_dword v[3:4], v1, exec
+// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
// GFX9: global_store_dword v[3:4], v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
// VI-ERR: instruction not supported on this GPU
global_load_dword v1, v[3:4], s2
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: :31: error: invalid operand for instruction
// VI-ERR: :31: error: invalid operand for instruction
global_load_dword v1, v[3:4], exec_hi
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: :31: error: invalid operand for instruction
// VI-ERR: :31: error: invalid operand for instruction
global_atomic_cmpswap v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0xc4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_cmpswap v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x04,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: error: instruction not supported on this GPU
global_atomic_cmpswap_x2 v[3:4], v[5:8], off
+// GFX10: encoding: [0x00,0x80,0x44,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off ; encoding: [0x00,0x80,0x84,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: error: instruction not supported on this GPU
global_atomic_swap v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xc0,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_swap v[3:4], v5, off ; encoding: [0x00,0x80,0x00,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: error: instruction not supported on this GPU
global_atomic_swap_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x40,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x80,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: error: instruction not supported on this GPU
global_atomic_add v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xc8,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_add v[3:4], v5, off ; encoding: [0x00,0x80,0x08,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_sub v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xcc,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_sub v[3:4], v5, off ; encoding: [0x00,0x80,0x0c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_smin v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smin v[3:4], v5, off ; encoding: [0x00,0x80,0x10,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_umin v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xd8,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umin v[3:4], v5, off ; encoding: [0x00,0x80,0x14,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_smax v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xdc,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smax v[3:4], v5, off ; encoding: [0x00,0x80,0x18,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_umax v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xe0,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umax v[3:4], v5, off ; encoding: [0x00,0x80,0x1c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_and v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xe4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_and v[3:4], v5, off ; encoding: [0x00,0x80,0x20,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_or v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xe8,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_or v[3:4], v5, off ; encoding: [0x00,0x80,0x24,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_xor v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xec,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_xor v[3:4], v5, off ; encoding: [0x00,0x80,0x28,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_inc v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xf0,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_inc v[3:4], v5, off ; encoding: [0x00,0x80,0x2c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_dec v[3:4], v5, off
+// GFX10: encoding: [0x00,0x80,0xf4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_dec v[3:4], v5, off ; encoding: [0x00,0x80,0x30,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_add_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x48,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_add_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x88,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_sub_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x4c,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x8c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_smin_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x54,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x90,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_umin_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x58,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x94,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_smax_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x5c,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x98,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_umax_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x60,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x9c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_and_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x64,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_and_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa0,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_or_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x68,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_or_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa4,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_xor_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x6c,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa8,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_inc_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x70,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xac,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_atomic_dec_x2 v[3:4], v[5:6], off
+// GFX10: encoding: [0x00,0x80,0x74,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xb0,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: error: instruction not supported on this GPU
global_atomic_cmpswap v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xc4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_cmpswap v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x04,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :49: error: not a valid operand.
global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x44,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16 ; encoding: [0xf0,0x9f,0x84,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :52: error: not a valid operand.
global_atomic_swap v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xc0,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_swap v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x00,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :42: error: not a valid operand
global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x40,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x80,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :49: error: not a valid operand
global_atomic_add v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xc8,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_add v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x08,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :41: error: not a valid operand
global_atomic_sub v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xcc,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_sub v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x0c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :41: error: not a valid operand
global_atomic_smin v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xd4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x10,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :42: error: not a valid operand
global_atomic_umin v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xd8,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x14,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :42: error: not a valid operand
global_atomic_smax v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xdc,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x18,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :42: error: not a valid operand
global_atomic_umax v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xe0,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x1c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :42: error: not a valid operand
global_atomic_and v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xe4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_and v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x20,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :41: error: not a valid operand
global_atomic_or v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xe8,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_or v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x24,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :40: error: not a valid operand
global_atomic_xor v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xec,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_xor v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x28,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :41: error: not a valid operand
global_atomic_inc v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xf0,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_inc v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x2c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :41: error: not a valid operand
global_atomic_dec v[3:4], v5, off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0xf4,0xdc,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_dec v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x30,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :41: error: not a valid operand
global_atomic_add_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x48,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_add_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x88,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :48: error: not a valid operand
global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x4c,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x8c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :48: error: not a valid operand
global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x54,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x90,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :49: error: not a valid operand
global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x58,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x94,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :49: error: not a valid operand
global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x5c,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x98,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :49: error: not a valid operand
global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x60,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x9c,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :49: error: not a valid operand
global_atomic_and_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x64,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_and_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa0,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :48: error: not a valid operand
global_atomic_or_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x68,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_or_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa4,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :47: error: not a valid operand
global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x6c,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa8,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :48: error: not a valid operand
global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x70,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xac,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :48: error: not a valid operand
global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16
+// GFX10: encoding: [0xf0,0x8f,0x74,0xdd,0x03,0x05,0x7d,0x00]
// GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xb0,0xdd,0x03,0x05,0x7f,0x00]
// VI-ERR: :48: error: not a valid operand
global_load_ubyte_d16 v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_ubyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
global_load_ubyte_d16_hi v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_ubyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
global_load_sbyte_d16 v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_sbyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
global_load_sbyte_d16_hi v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_sbyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
global_load_short_d16 v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_short_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
global_load_short_d16_hi v1, v[3:4], off
+// GFX10: encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: global_load_short_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
global_store_byte_d16_hi v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
global_store_short_d16_hi v[3:4], v1, off
+// GFX10: encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_short_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
Modified: llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s?rev=359621&r1=359620&r2=359621&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s (original)
+++ llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s Tue Apr 30 15:08:23 2019
@@ -2,176 +2,291 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s
+
scratch_load_ubyte v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x20,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_ubyte v1, v2, off ; encoding: [0x00,0x40,0x40,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_ubyte v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x20,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_sbyte v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x24,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_sbyte v1, v2, off ; encoding: [0x00,0x40,0x44,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_sbyte v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x24,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_ushort v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x28,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_ushort v1, v2, off ; encoding: [0x00,0x40,0x48,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_ushort v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x28,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_sshort v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x2c,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_sshort v1, v2, off ; encoding: [0x00,0x40,0x4c,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_sshort v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x2c,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_dword v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_dword v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x30,0xdc,0x02,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_dwordx2 v[1:2], v3, off
+// GFX10: encoding: [0x00,0x40,0x34,0xdc,0x03,0x00,0x7d,0x01]
// GFX9: scratch_load_dwordx2 v[1:2], v3, off ; encoding: [0x00,0x40,0x54,0xdc,0x03,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_dwordx2 v[1:2], v3, off dlc
+// GFX10: encoding: [0x00,0x50,0x34,0xdc,0x03,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_dwordx3 v[1:3], v4, off
+// GFX10: encoding: [0x00,0x40,0x3c,0xdc,0x04,0x00,0x7d,0x01]
// GFX9: scratch_load_dwordx3 v[1:3], v4, off ; encoding: [0x00,0x40,0x58,0xdc,0x04,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
+scratch_load_dwordx3 v[1:3], v4, off dlc
+// GFX10: encoding: [0x00,0x50,0x3c,0xdc,0x04,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_load_dwordx4 v[1:4], v5, off
+// GFX10: encoding: [0x00,0x40,0x38,0xdc,0x05,0x00,0x7d,0x01]
// GFX9: scratch_load_dwordx4 v[1:4], v5, off ; encoding: [0x00,0x40,0x5c,0xdc,0x05,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
-// FIXME: VI error should be instruction nto supported
+
+scratch_load_dwordx4 v[1:4], v5, off dlc
+// GFX10: encoding: [0x00,0x50,0x38,0xdc,0x05,0x00,0x7d,0x01]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
scratch_load_dword v1, v2, off offset:0
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: error: not a valid operand.
scratch_load_dword v1, v2, off offset:4095
+// GFX10-ERR: error: invalid operand for instruction
// GFX9: scratch_load_dword v1, v2, off offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: error: not a valid operand.
scratch_load_dword v1, v2, off offset:-1
+// GFX10: encoding: [0xff,0x4f,0x30,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_dword v1, v2, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: error: not a valid operand.
scratch_load_dword v1, v2, off offset:-4096
+// GFX10-ERR: error: invalid operand for instruction
// GFX9: scratch_load_dword v1, v2, off offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: error: not a valid operand.
scratch_load_dword v1, v2, off offset:4096
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: not a valid operand.
scratch_load_dword v1, v2, off offset:-4097
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: not a valid operand.
scratch_store_byte v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_byte v1, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+scratch_store_byte v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x60,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_store_short v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_short v1, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+scratch_store_short v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x68,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_store_dword v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_dword v1, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+scratch_store_dword v1, v2, off dlc
+// GFX10: encoding: [0x00,0x50,0x70,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_store_dwordx2 v1, v[2:3], off
+// GFX10: encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_dwordx2 v1, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+scratch_store_dwordx2 v1, v[2:3], off dlc
+// GFX10: encoding: [0x00,0x50,0x74,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_store_dwordx3 v1, v[2:4], off
+// GFX10: encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_dwordx3 v1, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+scratch_store_dwordx3 v1, v[2:4], off dlc
+// GFX10: encoding: [0x00,0x50,0x7c,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_store_dwordx4 v1, v[2:5], off
+// GFX10: encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_dwordx4 v1, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: instruction not supported on this GPU
+scratch_store_dwordx4 v1, v[2:5], off dlc
+// GFX10: encoding: [0x00,0x50,0x78,0xdc,0x01,0x02,0x7d,0x00]
+// GFX9-ERR: error: failed parsing operand
+// VI-ERR: error: invalid operand for instruction
+
scratch_store_dword v1, v2, off offset:12
+// GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00]
// GFX9: scratch_store_dword v1, v2, off offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00]
// VI-ERR: error: not a valid operand
scratch_load_dword v1, off, s1
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x01,0x01]
// GFX9: scratch_load_dword v1, off, s1 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x01,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_load_dword v1, off, s1 offset:32
+// GFX10: encoding: [0x20,0x40,0x30,0xdc,0x00,0x00,0x01,0x01]
// GFX9: scratch_load_dword v1, off, s1 offset:32 ; encoding: [0x20,0x40,0x50,0xdc,0x00,0x00,0x01,0x01]
// VI-ERR: error: not a valid operand
scratch_store_dword off, v2, s1
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
// GFX9: scratch_store_dword off, v2, s1 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
// VI-ERR: instruction not supported on this GPU
scratch_store_dword off, v2, s1 offset:12
+// GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
// GFX9: scratch_store_dword off, v2, s1 offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00]
// VI-ERR: error: not a valid operand
// FIXME: Should error about multiple offsets
scratch_load_dword v1, v2, s1
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: invalid operand for instruction
scratch_load_dword v1, v2, s1 offset:32
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: not a valid operand
scratch_store_dword v1, v2, s1
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: invalid operand for instruction
scratch_store_dword v1, v2, s1 offset:32
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: not a valid operand
scratch_load_dword v1, off, exec_hi
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: invalid operand for instruction
scratch_store_dword off, v2, exec_hi
+// GFX10-ERR: error: invalid operand for instruction
// GFX9-ERR: error: invalid operand for instruction
// VI-ERR: error: invalid operand for instruction
scratch_load_dword v1, off, exec_lo
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7e,0x01]
// GFX9: scratch_load_dword v1, off, exec_lo ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7e,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_store_dword off, v2, exec_lo
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00]
// GFX9: scratch_store_dword off, v2, exec_lo ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00]
// VI-ERR: instruction not supported on this GPU
scratch_load_dword v1, off, m0
+// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7c,0x01]
// GFX9: scratch_load_dword v1, off, m0 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7c,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_store_dword off, v2, m0
+// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00]
// GFX9: scratch_store_dword off, v2, m0 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00]
// VI-ERR: instruction not supported on this GPU
scratch_load_ubyte_d16 v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_ubyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_load_ubyte_d16_hi v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_ubyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_load_sbyte_d16 v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_sbyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_load_sbyte_d16_hi v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_sbyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_load_short_d16 v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_short_d16 v1, v2, off ; encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_load_short_d16_hi v1, v2, off
+// GFX10: encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7d,0x01]
// GFX9: scratch_load_short_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7f,0x01]
// VI-ERR: instruction not supported on this GPU
scratch_store_byte_d16_hi off, v2, s1
+// GFX10: encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00]
// GFX9: scratch_store_byte_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00]
// VI-ERR: instruction not supported on this GPU
scratch_store_short_d16_hi off, v2, s1
+// GFX10: encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00]
// GFX9: scratch_store_short_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00]
// VI-ERR: instruction not supported on this GPU
More information about the llvm-commits
mailing list