[llvm] r361688 - Revert r361644, "[AMDGPU] Divergence driven ISel. Assign register class for cross block values according to the divergence."
Peter Collingbourne via llvm-commits
llvm-commits at lists.llvm.org
Fri May 24 18:52:38 PDT 2019
Author: pcc
Date: Fri May 24 18:52:38 2019
New Revision: 361688
URL: http://llvm.org/viewvc/llvm-project?rev=361688&view=rev
Log:
Revert r361644, "[AMDGPU] Divergence driven ISel. Assign register class for cross block values according to the divergence."
Broke sanitizer bots:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux/builds/21694/steps/bootstrap%20clang/logs/stdio
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/32478/steps/check-llvm%20asan/logs/stdio
Modified:
llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/include/llvm/CodeGen/TargetRegisterInfo.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll
llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll
llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
llvm/trunk/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
llvm/trunk/test/CodeGen/AMDGPU/fabs.ll
llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll
llvm/trunk/test/CodeGen/AMDGPU/fsub.ll
llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
llvm/trunk/test/CodeGen/AMDGPU/madak.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
Modified: llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h Fri May 24 18:52:38 2019
@@ -13,6 +13,7 @@
#ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
#define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
+
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -20,7 +21,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -57,7 +57,6 @@ public:
const TargetLowering *TLI;
MachineRegisterInfo *RegInfo;
BranchProbabilityInfo *BPI;
- const LegacyDivergenceAnalysis *DA;
/// CanLowerReturn - true iff the function's return value can be lowered to
/// registers.
bool CanLowerReturn;
@@ -199,11 +198,9 @@ public:
return ValueMap.count(V);
}
- unsigned CreateReg(MVT VT, bool isDivergent = false);
-
- unsigned CreateRegs(const Value *V);
+ unsigned CreateReg(MVT VT);
- unsigned CreateRegs(Type *Ty, bool isDivergent = false);
+ unsigned CreateRegs(Type *Ty);
unsigned InitializeRegForValue(const Value *V) {
// Tokens never live in vregs.
@@ -212,7 +209,7 @@ public:
unsigned &R = ValueMap[V];
assert(R == 0 && "Already initialized this value register!");
assert(VirtReg2Value.empty());
- return R = CreateRegs(V);
+ return R = CreateRegs(V->getType());
}
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Fri May 24 18:52:38 2019
@@ -406,7 +406,6 @@ public:
const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
- const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
LLVMContext *getContext() const {return Context; }
OptimizationRemarkEmitter &getORE() const { return *ORE; }
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Fri May 24 18:52:38 2019
@@ -636,21 +636,12 @@ public:
/// Return the register class that should be used for the specified value
/// type.
- virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
- (void)isDivergent;
+ virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
assert(RC && "This value type is not natively supported!");
return RC;
}
- /// Allows target to decide about the register class of the
- /// specific value that is live outside the defining block.
- /// Returns true if the value needs uniform register class.
- virtual bool requiresUniformRegister(MachineFunction &MF,
- const Value *) const {
- return false;
- }
-
/// Return the 'representative' register class for the specified value
/// type.
///
Modified: llvm/trunk/include/llvm/CodeGen/TargetRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetRegisterInfo.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetRegisterInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetRegisterInfo.h Fri May 24 18:52:38 2019
@@ -520,11 +520,6 @@ public:
/// function. Used by MachineRegisterInfo::isConstantPhysReg().
virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
- /// Returns true if the register class is considered divergent.
- virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
- return false;
- }
-
/// Physical registers that may be modified within a function but are
/// guaranteed to be restored before any uses. This is useful for targets that
/// have call sequences where a GOT register may be updated by the caller
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri May 24 18:52:38 2019
@@ -13919,11 +13919,9 @@ struct LoadedSlice {
assert(DAG && "Missing context");
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
EVT ResVT = Use->getValueType(0);
- const TargetRegisterClass *ResRC =
- TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
+ const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
const TargetRegisterClass *ArgRC =
- TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
- Use->getOperand(0)->isDivergent());
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp Fri May 24 18:52:38 2019
@@ -85,7 +85,6 @@ void FunctionLoweringInfo::set(const Fun
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
unsigned StackAlign = TFI->getStackAlignment();
- DA = DAG->getDivergenceAnalysis();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -346,9 +345,9 @@ void FunctionLoweringInfo::clear() {
}
/// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
return RegInfo->createVirtualRegister(
- MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -358,7 +357,7 @@ unsigned FunctionLoweringInfo::CreateReg
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
@@ -371,18 +370,13 @@ unsigned FunctionLoweringInfo::CreateReg
unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = CreateReg(RegisterVT, isDivergent);
+ unsigned R = CreateReg(RegisterVT);
if (!FirstReg) FirstReg = R;
}
}
return FirstReg;
}
-unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
- return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
- DA->isDivergent(V));
-}
-
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
/// the register's LiveOutInfo is for a smaller bit width, it is extended to
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Fri May 24 18:52:38 2019
@@ -105,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned R
// Stick to the preferred register classes for legal types.
if (TLI->isTypeLegal(VT))
- UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
+ UseRC = TLI->getRegClassFor(VT);
if (!IsClone && !IsCloned)
for (SDNode *User : Node->uses()) {
@@ -164,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned R
"Incompatible phys register def and uses!");
DstRC = UseRC;
} else {
- DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
+ DstRC = TLI->getRegClassFor(VT);
}
// If all uses are reading from the src physical register and copying the
@@ -225,9 +225,8 @@ void InstrEmitter::CreateVirtualRegister
// type correctly. For example, a 64-bit float (X86::FR64) can't live in
// the 32-bit float super-class (X86::FR32).
if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
- const TargetRegisterClass *VTRC = TLI->getRegClassFor(
- Node->getSimpleValueType(i),
- (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
+ const TargetRegisterClass *VTRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(i));
if (RC)
VTRC = TRI->getCommonSubClass(RC, VTRC);
if (VTRC)
@@ -290,8 +289,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
// IMPLICIT_DEF can produce any type of result so its MCInstrDesc
// does not include operand register class info.
if (!VReg) {
- const TargetRegisterClass *RC = TLI->getRegClassFor(
- Op.getSimpleValueType(), Op.getNode()->isDivergent());
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(Op.getSimpleValueType());
VReg = MRI->createVirtualRegister(RC);
}
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
@@ -396,15 +395,11 @@ void InstrEmitter::AddOperand(MachineIns
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
unsigned VReg = R->getReg();
MVT OpVT = Op.getSimpleValueType();
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
const TargetRegisterClass *IIRC =
II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
: nullptr;
- const TargetRegisterClass *OpRC =
- TLI->isTypeLegal(OpVT)
- ? TLI->getRegClassFor(OpVT,
- Op.getNode()->isDivergent() ||
- (IIRC && TRI->isDivergentRegClass(IIRC)))
- : nullptr;
if (OpRC && IIRC && OpRC != IIRC &&
TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -469,7 +464,7 @@ void InstrEmitter::AddOperand(MachineIns
}
unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
- MVT VT, bool isDivergent, const DebugLoc &DL) {
+ MVT VT, const DebugLoc &DL) {
const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
@@ -484,7 +479,7 @@ unsigned InstrEmitter::ConstrainForSubRe
// VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
// register instead.
- RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
assert(RC && "No legal register class for VT supports that SubIdx");
unsigned NewReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -519,7 +514,7 @@ void InstrEmitter::EmitSubregNode(SDNode
// classes.
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *TRC =
- TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+ TLI->getRegClassFor(Node->getSimpleValueType(0));
unsigned Reg;
MachineInstr *DefMI;
@@ -553,7 +548,8 @@ void InstrEmitter::EmitSubregNode(SDNode
if (TargetRegisterInfo::isVirtualRegister(Reg))
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
- Node->isDivergent(), Node->getDebugLoc());
+ Node->getDebugLoc());
+
// Create the destreg if it is missing.
if (VRBase == 0)
VRBase = MRI->createVirtualRegister(TRC);
@@ -588,8 +584,7 @@ void InstrEmitter::EmitSubregNode(SDNode
//
// There is no constraint on the %src register class.
//
- const TargetRegisterClass *SRC =
- TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+ const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h Fri May 24 18:52:38 2019
@@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitt
/// supports SubIdx sub-registers. Emit a copy if that isn't possible.
/// Return the virtual register to use.
unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
- bool isDivergent, const DebugLoc &DL);
+ const DebugLoc &DL);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Fri May 24 18:52:38 2019
@@ -9844,7 +9844,7 @@ SelectionDAGBuilder::HandlePHINodesInSuc
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
- RegOut = FuncInfo.CreateRegs(C);
+ RegOut = FuncInfo.CreateRegs(C->getType());
CopyValueToVirtualRegister(C, RegOut);
}
Reg = RegOut;
@@ -9857,7 +9857,7 @@ SelectionDAGBuilder::HandlePHINodesInSuc
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
- Reg = FuncInfo.CreateRegs(PHIOp);
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri May 24 18:52:38 2019
@@ -1485,7 +1485,7 @@ void SelectionDAGISel::SelectAllBasicBlo
!Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
- R = FuncInfo->CreateRegs(Inst);
+ R = FuncInfo->CreateRegs(Inst->getType());
}
bool HadTailCall = false;
Modified: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp Fri May 24 18:52:38 2019
@@ -302,6 +302,18 @@ static bool foldVGPRCopyIntoRegSequence(
return true;
}
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+ const MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII) {
+ for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+ unsigned Reg = PHI.getOperand(i).getReg();
+ if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+ return true;
+ }
+ return false;
+}
+
static bool phiHasBreakDef(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
SmallSet<unsigned, 8> &Visited) {
@@ -326,6 +338,16 @@ static bool phiHasBreakDef(const Machine
return false;
}
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+ E = MBB.end(); I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+ return true;
+ }
+ return false;
+}
+
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const MachineInstr *MoveImm,
const SIInstrInfo *TII,
@@ -387,6 +409,12 @@ bool searchPredecessors(const MachineBas
return false;
}
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+ return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
// Checks if there is potential path From instruction To instruction.
// If CutOff is specified and it sits in between of that path we ignore
// a higher portion of the path and report it is not reachable.
@@ -593,73 +621,63 @@ bool SIFixSGPRCopies::runOnMachineFuncti
break;
}
case AMDGPU::PHI: {
- unsigned hasVGPRUses = 0;
- SetVector<const MachineInstr *> worklist;
- worklist.insert(&MI);
- while (!worklist.empty()) {
- const MachineInstr *Instr = worklist.pop_back_val();
- unsigned Reg = Instr->getOperand(0).getReg();
- for (const auto &Use : MRI.use_operands(Reg)) {
- const MachineInstr *UseMI = Use.getParent();
- if (UseMI->isCopy() || UseMI->isRegSequence()) {
- if (UseMI->isCopy() &&
- TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
- !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
- hasVGPRUses++;
- }
- worklist.insert(UseMI);
- continue;
- }
-
- if (UseMI->isPHI()) {
- if (!TRI->isSGPRReg(MRI, Use.getReg()))
- hasVGPRUses++;
- continue;
- }
-
- unsigned OpNo = UseMI->getOperandNo(&Use);
- const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
- if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
- const TargetRegisterClass *OpRC =
- TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
- if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
- OpRC != &AMDGPU::VS_64RegClass) {
- hasVGPRUses++;
- }
- }
- }
- }
- bool hasVGPRInput = false;
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- unsigned InputReg = MI.getOperand(i).getReg();
- MachineInstr *Def = MRI.getVRegDef(InputReg);
- if (TRI->isVGPR(MRI, InputReg)) {
- if (Def->isCopy()) {
- unsigned SrcReg = Def->getOperand(1).getReg();
- const TargetRegisterClass *RC =
- TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
- : TRI->getPhysRegClass(SrcReg);
- if (TRI->isSGPRClass(RC))
- continue;
- }
- hasVGPRInput = true;
- break;
- } else if (Def->isCopy() &&
- TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
- hasVGPRInput = true;
+ unsigned Reg = MI.getOperand(0).getReg();
+ if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+ break;
+
+ // We don't need to fix the PHI if the common dominator of the
+ // two incoming blocks terminates with a uniform branch.
+ bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+ if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
+ MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+ MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+ if (!predsHasDivergentTerminator(MBB0, TRI) &&
+ !predsHasDivergentTerminator(MBB1, TRI)) {
+ LLVM_DEBUG(dbgs()
+ << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
}
- unsigned PHIRes = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
- if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
- (hasVGPRInput || hasVGPRUses > 1)) {
- TII->moveToVALU(MI);
- } else {
- TII->legalizeOperands(MI, MDT);
+ // If a PHI node defines an SGPR and any of its operands are VGPRs,
+ // then we need to move it to the VALU.
+ //
+ // Also, if a PHI node defines an SGPR and has all SGPR operands
+ // we must move it to the VALU, because the SGPR operands will
+ // all end up being assigned the same register, which means
+ // there is a potential for a conflict if different threads take
+ // different control flow paths.
+ //
+ // For Example:
+ //
+ // sgpr0 = def;
+ // ...
+ // sgpr1 = def;
+ // ...
+ // sgpr2 = PHI sgpr0, sgpr1
+ // use sgpr2;
+ //
+ // Will Become:
+ //
+ // sgpr2 = def;
+ // ...
+ // sgpr2 = def;
+ // ...
+ // use sgpr2
+ //
+ // The one exception to this rule is when one of the operands
+ // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+ // instruction. In this case, there we know the program will
+ // never enter the second block (the loop) without entering
+ // the first block (where the condition is computed), so there
+ // is no chance for values to be over-written.
+
+ SmallSet<unsigned, 8> Visited;
+ if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
+ LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
+ TII->moveToVALU(MI, MDT);
}
-
break;
}
case AMDGPU::REG_SEQUENCE:
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri May 24 18:52:38 2019
@@ -9637,8 +9637,7 @@ SDNode *SITargetLowering::PostISelFoldin
break;
MVT VT = Src0.getValueType().getSimpleVT();
- const TargetRegisterClass *RC =
- getRegClassFor(VT, Src0.getNode()->isDivergent());
+ const TargetRegisterClass *RC = getRegClassFor(VT);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -10172,91 +10171,3 @@ SITargetLowering::shouldExpandAtomicRMWI
return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}
-
-const TargetRegisterClass *
-SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
- const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
- const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
- if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
- return &AMDGPU::SReg_64RegClass;
- if (!TRI->isSGPRClass(RC) && !isDivergent)
- return TRI->getEquivalentSGPRClass(RC);
- else if (TRI->isSGPRClass(RC) && isDivergent)
- return TRI->getEquivalentVGPRClass(RC);
-
- return RC;
-}
-
-static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) {
- if (Visited.count(V))
- return false;
- Visited.insert(V);
- bool Result = false;
- for (auto U : V->users()) {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
- if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) &&
- (V == U->getOperand(1)))
- Result = true;
- } else {
- Result = hasIfBreakUser(U, Visited);
- }
- if (Result)
- break;
- }
- return Result;
-}
-
-bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
- const Value *V) const {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if_break:
- return true;
- }
- }
- if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
- if (const IntrinsicInst *Intrinsic =
- dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if:
- case Intrinsic::amdgcn_else: {
- ArrayRef<unsigned> Indices = ExtValue->getIndices();
- if (Indices.size() == 1 && Indices[0] == 1) {
- return true;
- }
- }
- }
- }
- }
- if (const CallInst *CI = dyn_cast<CallInst>(V)) {
- if (isa<InlineAsm>(CI->getCalledValue())) {
- const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
- ImmutableCallSite CS(CI);
- TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
- MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
- for (auto &TC : TargetConstraints) {
- if (TC.Type == InlineAsm::isOutput) {
- ComputeConstraintToUse(TC, SDValue());
- unsigned AssignedReg;
- const TargetRegisterClass *RC;
- std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
- SIRI, TC.ConstraintCode,
- getSimpleValueType(MF.getDataLayout(), CS.getType()));
- if (RC) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
- return true;
- else if (SIRI->isSGPRClass(RC))
- return true;
- }
- }
- }
- }
- }
- SetVector<const Value *> Visited;
- return hasIfBreakUser(V, Visited);
-}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Fri May 24 18:52:38 2019
@@ -367,10 +367,7 @@ public:
bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
- virtual const TargetRegisterClass *
- getRegClassFor(MVT VT, bool isDivergent) const override;
- virtual bool requiresUniformRegister(MachineFunction &MF,
- const Value *V) const override;
+
unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
};
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri May 24 18:52:38 2019
@@ -2219,10 +2219,6 @@ bool SIInstrInfo::FoldImmediate(MachineI
// These come before src2.
removeModOperands(UseMI);
UseMI.setDesc(get(NewOpc));
- // It might happen that UseMI was commuted
- // and we now have SGPR as SRC1. If so 2 inlined
- // constant and SGPR are illegal.
- legalizeOperands(UseMI);
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
if (DeleteDef)
@@ -3917,7 +3913,7 @@ void SIInstrInfo::legalizeGenericOperand
return;
// Try to eliminate the copy if it is copying an immediate value.
- if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
+ if (Def->isMoveImmediate())
FoldImmediate(*Copy, *Def, OpReg, &MRI);
}
@@ -4151,10 +4147,7 @@ void SIInstrInfo::legalizeOperands(Machi
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
- if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
- VRC = &AMDGPU::VReg_1RegClass;
- } else
- VRC = RI.getEquivalentVGPRClass(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
}
RC = VRC;
} else {
@@ -5316,7 +5309,7 @@ const TargetRegisterClass *SIInstrInfo::
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
case AMDGPU::WWM:
- if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
+ if (RI.hasVGPRs(NewDstRC))
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Fri May 24 18:52:38 2019
@@ -195,11 +195,6 @@ public:
unsigned Reg) const;
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
- virtual bool
- isDivergentRegClass(const TargetRegisterClass *RC) const override {
- return !isSGPRClass(RC);
- }
-
bool isSGPRPressureSet(unsigned SetID) const {
return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
}
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri May 24 18:52:38 2019
@@ -1429,9 +1429,7 @@ EVT ARMTargetLowering::getSetCCResultTyp
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *
-ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
- (void)isDivergent;
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri May 24 18:52:38 2019
@@ -456,8 +456,7 @@ class VectorType;
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- const TargetRegisterClass *
- getRegClassFor(MVT VT, bool isDivergent = false) const override;
+ const TargetRegisterClass *getRegClassFor(MVT VT) const override;
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
Modified: llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll Fri May 24 18:52:38 2019
@@ -5,12 +5,11 @@ define i32 @atomic_nand_i32_lds(i32 addr
; GCN-LABEL: atomic_nand_i32_lds:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: ds_read_b32 v1, v0
+; GCN-NEXT: ds_read_b32 v2, v0
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: BB0_1: ; %atomicrmw.start
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_not_b32_e32 v1, v2
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -18,6 +17,7 @@ define i32 @atomic_nand_i32_lds(i32 addr
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execnz BB0_1
@@ -33,12 +33,11 @@ define i32 @atomic_nand_i32_global(i32 a
; GCN-LABEL: atomic_nand_i32_global:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: global_load_dword v2, v[0:1], off
+; GCN-NEXT: global_load_dword v3, v[0:1], off
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: BB1_1: ; %atomicrmw.start
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_not_b32_e32 v2, v3
; GCN-NEXT: v_or_b32_e32 v2, -5, v2
; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -46,6 +45,7 @@ define i32 @atomic_nand_i32_global(i32 a
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execnz BB1_1
@@ -61,12 +61,11 @@ define i32 @atomic_nand_i32_flat(i32* %p
; GCN-LABEL: atomic_nand_i32_flat:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: flat_load_dword v2, v[0:1]
+; GCN-NEXT: flat_load_dword v3, v[0:1]
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: BB2_1: ; %atomicrmw.start
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_not_b32_e32 v2, v3
; GCN-NEXT: v_or_b32_e32 v2, -5, v2
; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -75,6 +74,7 @@ define i32 @atomic_nand_i32_flat(i32* %p
; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execnz BB2_1
Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll Fri May 24 18:52:38 2019
@@ -99,7 +99,7 @@ bb3:
; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
; GCN: s_load_dword [[CND:s[0-9]+]]
-
+; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]]
; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
@@ -117,7 +117,6 @@ bb3:
; GCN: v_nop_e64
; GCN: [[ENDBB]]:
-; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
; GCN: buffer_store_dword [[V_CND]]
; GCN: s_endpgm
define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll Fri May 24 18:52:38 2019
@@ -8,8 +8,8 @@
;
; CHECK-LABEL: {{^}}main:
; CHECK: ; %LOOP49
-; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; CHECK: s_cbranch_scc1
+; CHECK: v_cmp_ne_u32_e32 vcc,
+; CHECK: s_cbranch_vccnz
; CHECK: ; %ENDIF53
define amdgpu_vs float @main(i32 %in) {
main_body:
Modified: llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll Fri May 24 18:52:38 2019
@@ -89,7 +89,7 @@ endif:
}
; GCN-LABEL: {{^}}divergent_loop:
-; VGPR: workitem_private_segment_byte_size = 12{{$}}
+; VGPR: workitem_private_segment_byte_size = 16{{$}}
; GCN: {{^}}; %bb.0:
@@ -123,9 +123,10 @@ endif:
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
-; GCN: s_cmp_lg_u32
+; GCN: v_cmp_ne_u32_e32 vcc,
+; GCN: s_and_b64 vcc, exec, vcc
; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN-NEXT: s_cbranch_scc1 [[LOOP]]
+; GCN-NEXT: s_cbranch_vccnz [[LOOP]]
; GCN: [[END]]:
Modified: llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll Fri May 24 18:52:38 2019
@@ -13,50 +13,55 @@ define amdgpu_ps void @main(i32, float)
; CHECK: ; %bb.0: ; %start
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 m0, s0
-; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
-; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; CHECK-NEXT: s_mov_b64 s[2:3], 0
-; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5
+; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
; CHECK-NEXT: BB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], exec
-; CHECK-NEXT: s_cmp_lt_u32 s0, 32
-; CHECK-NEXT: s_mov_b64 s[6:7], -1
-; CHECK-NEXT: s_cbranch_scc0 BB0_5
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
+; CHECK-NEXT: s_and_b64 vcc, exec, vcc
+; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
+; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: s_cbranch_vccz BB0_5
; CHECK-NEXT: ; %bb.2: ; %endif1
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_mov_b64 s[4:5], -1
-; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT: s_mov_b64 s[6:7], -1
+; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
+; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
; CHECK-NEXT: ; mask branch BB0_4
; CHECK-NEXT: BB0_3: ; %endif2
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_add_i32 s0, s0, 1
-; CHECK-NEXT: s_xor_b64 s[4:5], exec, -1
+; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
+; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT: BB0_4: ; %Flow1
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
-; CHECK-NEXT: s_mov_b64 s[6:7], 0
-; CHECK-NEXT: BB0_5: ; %Flow
+; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: s_branch BB0_6
+; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: ; implicit-def: $vgpr1
+; CHECK-NEXT: BB0_6: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_and_b64 s[8:9], exec, s[4:5]
-; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[8:9]
+; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
+; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_cbranch_execnz BB0_1
-; CHECK-NEXT: ; %bb.6: ; %Flow2
+; CHECK-NEXT: ; %bb.7: ; %Flow2
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[6:7]
-; CHECK-NEXT: ; mask branch BB0_8
-; CHECK-NEXT: BB0_7: ; %if1
+; this is the divergent branch with the condition not marked as divergent
+; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
+; CHECK-NEXT: ; mask branch BB0_9
+; CHECK-NEXT: BB0_8: ; %if1
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT: BB0_8: ; %endloop
+; CHECK-NEXT: BB0_9: ; %endloop
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; CHECK-NEXT: s_endpgm
-; this is the divergent branch with the condition not marked as divergent
start:
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
br label %loop
Modified: llvm/trunk/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll Fri May 24 18:52:38 2019
@@ -13,9 +13,9 @@ define amdgpu_hs void @main([0 x i8] add
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN: [[DEF1:%[0-9]+]]:sreg_128 = IMPLICIT_DEF
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2
; GCN: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
; GCN: [[DEF2:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
Modified: llvm/trunk/test/CodeGen/AMDGPU/fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fabs.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fabs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fabs.ll Fri May 24 18:52:38 2019
@@ -48,8 +48,8 @@ define amdgpu_kernel void @s_fabs_f32(fl
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; GCN: s_and_b32
-; GCN: s_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@@ -62,10 +62,10 @@ define amdgpu_kernel void @fabs_v2f32(<2
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; GCN: s_and_b32
-; GCN: s_and_b32
-; GCN: s_and_b32
-; GCN: s_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll Fri May 24 18:52:38 2019
@@ -85,15 +85,15 @@ define amdgpu_kernel void @div_minus_1_b
; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -121,15 +121,15 @@ define amdgpu_kernel void @div_v4_1_by_x
}
; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
@@ -156,15 +156,15 @@ define amdgpu_kernel void @div_v4_minus_
}
; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
@@ -194,15 +194,15 @@ define amdgpu_kernel void @div_v4_1_by_m
; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -231,6 +231,8 @@ define amdgpu_kernel void @div_v4_minus_
}
; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp:
+; GCN-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -238,12 +240,9 @@ define amdgpu_kernel void @div_v4_minus_
; GCN-DENORM-DAG: v_rcp_f32_e32
; GCN-DENORM-DAG: v_rcp_f32_e32
-; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
-; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-
-; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -274,6 +273,8 @@ define amdgpu_kernel void @div_v4_c_by_x
}
; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
+; GCN-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -281,12 +282,9 @@ define amdgpu_kernel void @div_v4_c_by_x
; GCN-DENORM-DAG: v_rcp_f32_e32
; GCN-DENORM-DAG: v_rcp_f32_e32
-; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
-; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-
-; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll Fri May 24 18:52:38 2019
@@ -33,13 +33,9 @@ define amdgpu_kernel void @s_test_fmin_l
; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
+; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
-; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
-
-; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
-
-; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
+; SI-SAFE: v_min_legacy_f32_e64 {{v[0-9]+}}, [[VB]], s[[A]]
; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll Fri May 24 18:52:38 2019
@@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
; SI-NOT: and
-; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{s[0-9]+}}|
define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
@@ -15,7 +15,7 @@ define amdgpu_kernel void @fneg_fabs_fad
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
; SI-NOT: and
-; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{s[0-9]+}}|
; SI-NOT: and
define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
@@ -85,8 +85,8 @@ define amdgpu_kernel void @v_fneg_fabs_f
; FIXME: In this case two uses of the constant should be folded
; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -96,10 +96,10 @@ define amdgpu_kernel void @fneg_fabs_v2f
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define amdgpu_kernel void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
Modified: llvm/trunk/test/CodeGen/AMDGPU/fsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fsub.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fsub.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fsub.ll Fri May 24 18:52:38 2019
@@ -27,8 +27,8 @@ define amdgpu_kernel void @s_fsub_f32(fl
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
%sub = fsub <2 x float> %a, %b
store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
@@ -55,10 +55,10 @@ define amdgpu_kernel void @v_fsub_v4f32(
}
; FUNC-LABEL: {{^}}s_fsub_v4f32:
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: s_endpgm
define amdgpu_kernel void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
%result = fsub <4 x float> %a, %b
Modified: llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll Fri May 24 18:52:38 2019
@@ -4,11 +4,17 @@
; SI-LABEL: {{^}}i1_copy_from_loop:
;
; SI: ; %for.body
-; SI: v_cmp_lt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], s{{[0-9+]}}, 4
+; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
+; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
+; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
+; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
+
+; SI: ; %Flow1
+; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec
; SI: ; %Flow
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
-; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
+; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
; SI: ; %for.end
Modified: llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll Fri May 24 18:52:38 2019
@@ -7,6 +7,7 @@
; GCN: s_cbranch_scc1 [[PREEXIT:BB[0-9_]+]]
; GCN: ; %blocka
+; GCN: s_xor_b64 s[{{[0-9:]+}}], exec, -1
; GCN: s_cmp_eq_u32 s1, 0
; GCN: s_cbranch_scc1 [[EXIT:BB[0-9_]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Fri May 24 18:52:38 2019
@@ -11,12 +11,12 @@
; GCN-LABEL: {{^}}insertelement_v4f32_0:
; GCN: s_load_dwordx4
-; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
-; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
-
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
+; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll Fri May 24 18:52:38 2019
@@ -387,7 +387,7 @@ define amdgpu_kernel void @test_div_scal
; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
; SI-NOT: v0
-; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
+; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, v0, v0, v0
define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll Fri May 24 18:52:38 2019
@@ -53,8 +53,8 @@ define amdgpu_kernel void @test_fabs_fme
}
; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0:
-; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
+; GCN: s_brev_b32 [[NEG0:s[0-9]+]], 1
+; GCN: v_med3_f32 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
%neg.med3 = fsub float -0.0, %med3
@@ -88,8 +88,8 @@ define amdgpu_kernel void @test_fneg_fme
; GCN-LABEL: {{^}}test_fneg_fmed3_r_inv2pi_0_foldable_user:
; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
-; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
+; GCN-DAG: s_mov_b32 [[NEG_INV:s[0-9]+]], 0xbe22f983
+; GCN: v_med3_f32 [[MED3:v[0-9]+]], -v{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll Fri May 24 18:52:38 2019
@@ -42,8 +42,6 @@ define amdgpu_kernel void @dpp_wait_stat
; VI-OPT: s_mov_b32
; VI-OPT: s_mov_b32
; VI-NOOPT: s_waitcnt
-; VI-NOOPT-NEXT: v_mov_b32_e32
-; VI-NOOPT-NEXT: s_nop 0
; VI-NOOPT-NEXT: s_nop 0
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-OPT: s_nop 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll Fri May 24 18:52:38 2019
@@ -4,7 +4,7 @@
declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; GCN-DAG: v_mov_b32_e32 v5, v1
; GCN-DAG: v_mov_b32_e32 v4, v0
define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll Fri May 24 18:52:38 2019
@@ -4,7 +4,7 @@
declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; GCN-DAG: v_mov_b32_e32 v5, v1
; GCN-DAG: v_mov_b32_e32 v4, v0
define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll Fri May 24 18:52:38 2019
@@ -26,9 +26,10 @@
; GCN: s_mov_b64 [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
-; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
-; GCN: s_cmp_gt_i32 s4, -1
-; GCN: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN: v_cmp_lt_i32_e32 vcc, -1
+; GCN: s_and_b64 vcc, exec, vcc
+; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
+; GCN: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
; GCN: ; %bb4
; GCN: buffer_load_dword
@@ -38,7 +39,6 @@
; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[TMP0]]
; GCN: [[FLOW]]: ; %Flow
-; GCN: ; in Loop: Header=BB0_1 Depth=1
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[OUTER_MASK]]
; GCN: s_mov_b64 [[OUTER_MASK]], [[TMP1]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madak.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madak.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll Fri May 24 18:52:38 2019
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,MAD,GFX10-MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -17,7 +17,6 @@ declare float @llvm.fabs.f32(float) noun
; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
; MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; FMA: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -80,7 +79,6 @@ define amdgpu_kernel void @madak_2_use_f
; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
; GCN: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -108,7 +106,6 @@ define amdgpu_kernel void @madak_m_inlin
; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
; MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
; FMA: v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -237,12 +234,9 @@ define amdgpu_kernel void @no_madak_src1
; On GFX10+ we can use two scalar operands.
; GCN-LABEL: {{^}}madak_constant_bus_violation:
; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
-
+; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
; GCN: {{buffer|flat|global}}_load_dword [[VGPR:v[0-9]+]]
-; MAD: v_mov_b32_e32 [[MADAK:v[0-9]+]], 0x42280000
-; MAD: v_mac_f32_e64 [[MADAK]], [[SGPR0]], 0.5
-; GFX10: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
-; GFX10-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
; FMA: v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
; GFX6: buffer_store_dword [[MUL]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll Fri May 24 18:52:38 2019
@@ -155,9 +155,8 @@ entry:
; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
; CHECK-O0: s_xor_b64 exec, exec, [[CMP]]
; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
-; CHECK-O0: v_readlane_b32 s[[S1:[0-9]+]], v{{[0-9]+}}, 4
-; CHECK-O0: v_readlane_b32 s[[S2:[0-9]+]], v{{[0-9]+}}, 5
-; CHECK-O0: s_mov_b64 exec, s{{\[}}[[S1]]:[[S2]]{{\]}}
+
+; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]]
; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
; CHECK-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll Fri May 24 18:52:38 2019
@@ -96,6 +96,7 @@ ENDIF:
; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
; GCN: ; %LeafBlock1
+; GCN: s_mov_b64
; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN: ; %case1
@@ -108,6 +109,8 @@ ENDIF:
; GCN: s_mov_b64 [[BREAK]], -1{{$}}
+; GCN: [[FLOW]]: ; %Flow
+
; GCN: ; %case0
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]],
; GCN-DAG: s_andn2_b64 [[BREAK]], [[BREAK]], exec
@@ -115,7 +118,7 @@ ENDIF:
; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]]
-; GCN: [[FLOW]]: ; %Flow4
+; GCN: ; %Flow4
; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]]
; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]]
; GCN: s_andn2_b64 exec, exec, [[LEFT]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll Fri May 24 18:52:38 2019
@@ -135,8 +135,8 @@ define amdgpu_kernel void @opt_select_i6
; GCN-LABEL: {{^}}regression:
; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
-; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
-; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
+; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
+; GCN: v_cmp_eq_f32_e32 vcc, 0, v{{[0-9]+}}
define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
entry:
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll Fri May 24 18:52:38 2019
@@ -104,8 +104,7 @@ endif:
; SI: ; %else
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_gt_i32_e32 vcc, 0, [[AVAL]]
-; SI: s_and_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], vcc, exec
+; SI: v_cmp_gt_i32_e64 [[PHI:s\[[0-9]+:[0-9]+\]]], 0, [[AVAL]]
; SI: ; %if
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir Fri May 24 18:52:38 2019
@@ -16,7 +16,7 @@ registers:
body: |
; GCN-LABEL: name: phi_visit_order
- ; GCN: S_ADD_I32
+ ; GCN: V_ADD_I32
bb.0:
liveins: $vgpr0
%7 = COPY $vgpr0
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Fri May 24 18:52:38 2019
@@ -571,6 +571,7 @@ main_body:
;
; TODO: we should keep the loop counter in an SGPR
;
+; GCN: v_readfirstlane_b32
; GCN: s_buffer_load_dword
define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 {
main_body:
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll Fri May 24 18:52:38 2019
@@ -1,43 +1,28 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
; Don't crash when the use of an undefined value is only detected by the
; register coalescer because it is hidden with subregister insert/extract.
target triple="amdgcn--"
-define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
; CHECK-LABEL: foobar:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
-; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CHECK-NEXT: s_mov_b32 s2, -1
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-
-; FIXME: The change related to the fact that
-; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
-; and hence it cannot derive the fact that the vector element is unused.
-; Such a copies appear because the float4 vectors and their elements in the test are uniform
-; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
+; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
+; CHECK-NEXT: s_mov_b32 s2, -1
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT: v_mov_b32_e32 v0, s4
-; CHECK-NEXT: v_mov_b32_e32 v1, s5
-; CHECK-NEXT: v_mov_b32_e32 v2, s6
-; CHECK-NEXT: v_mov_b32_e32 v3, s7
+; CHECK: BB0_1:
+; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
+; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; CHECK-NEXT: ; mask branch BB0_2
-; CHECK-NEXT: BB0_1: ; %ift
-; CHECK-NEXT: s_mov_b32 s4, s5
-; CHECK-NEXT: v_mov_b32_e32 v0, s4
-; CHECK-NEXT: v_mov_b32_e32 v1, s5
-; CHECK-NEXT: v_mov_b32_e32 v2, s6
-; CHECK-NEXT: v_mov_b32_e32 v3, s7
-; CHECK-NEXT: BB0_2: ; %ife
-; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
-; CHECK-NEXT: s_mov_b32 s3, 0xf000
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
-; CHECK-NEXT: s_endpgm
+; CHECK: BB0_2:
+; CHECK: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_mov_b32 s3, 0xf000
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; CHECK-NEXT: s_endpgm
+define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
entry:
%v0 = insertelement <4 x float> undef, float %a0, i32 0
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll Fri May 24 18:52:38 2019
@@ -7,9 +7,10 @@
; CHECK: s_and_saveexec_b64
; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
+; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
-; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: ; %loop_body
-; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]]
+; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
+; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]]
; CHECK: s_endpgm
define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll Fri May 24 18:52:38 2019
@@ -226,12 +226,13 @@ define amdgpu_kernel void @test_literal_
; GCN-LABEL: {{^}}test_s0_s1_k_f32:
; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
+; GCN-DAG: s_mov_b32 [[SK0:s[0-9]+]], 0x44800000
; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], s[[SGPR1]]
+; GCN-DAG: v_mov_b32_e32 [[VS0:v[0-9]+]], s[[SGPR0]]
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK0]]
-; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
-; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK1]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS0]], [[VS1]], [[SK0]]
+; GCN-DAG: s_mov_b32 [[SK1:s[0-9]+]], 0x45800000
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VS0]], [[VS1]], [[SK1]]
; GCN: buffer_store_dword [[RESULT0]]
; GCN: buffer_store_dword [[RESULT1]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll Fri May 24 18:52:38 2019
@@ -165,8 +165,8 @@ exit:
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
; SI: buffer_load_dword
; SI-DAG: buffer_store_dword
-; SI-DAG: s_cmpk_eq_i32 s{{[0-9+]}}, 0x100
-; SI: s_cbranch_scc0 [[LABEL_LOOP]]
+; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100
+; SI: s_cbranch_vccz [[LABEL_LOOP]]
; SI: [[LABEL_EXIT]]:
; SI: s_endpgm
@@ -214,7 +214,7 @@ exit:
; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
-; SI: ; mask branch [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
+; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
; SI: buffer_store_dword
Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll?rev=361688&r1=361687&r2=361688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll Fri May 24 18:52:38 2019
@@ -1,4 +1,3 @@
-; XFAIL: *
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s
More information about the llvm-commits
mailing list