[llvm] r280970 - AMDGPU: Support commuting with immediate in src0
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 8 10:19:31 PDT 2016
Author: arsenm
Date: Thu Sep 8 12:19:29 2016
New Revision: 280970
URL: http://llvm.org/viewvc/llvm-project?rev=280970&view=rev
Log:
AMDGPU: Support commuting with immediate in src0
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
llvm/trunk/test/CodeGen/AMDGPU/commute_modifiers.ll
llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll
llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
llvm/trunk/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
llvm/trunk/test/CodeGen/AMDGPU/fabs.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/fabs.ll
llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll
llvm/trunk/test/CodeGen/AMDGPU/fmaxnum.ll
llvm/trunk/test/CodeGen/AMDGPU/fminnum.ll
llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll
llvm/trunk/test/CodeGen/AMDGPU/fsub64.ll
llvm/trunk/test/CodeGen/AMDGPU/imm.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll
llvm/trunk/test/CodeGen/AMDGPU/madak.ll
llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll
llvm/trunk/test/CodeGen/AMDGPU/setcc.ll
llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
llvm/trunk/test/CodeGen/AMDGPU/trunc.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Thu Sep 8 12:19:29 2016
@@ -497,9 +497,7 @@ void SIInstrInfo::copyPhysReg(MachineBas
}
}
-int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
- const unsigned Opcode = MI.getOpcode();
-
+int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
int NewOpc;
// Try to map original to commuted opcode
@@ -908,91 +906,89 @@ bool SIInstrInfo::expandPostRAPseudo(Mac
return true;
}
-/// Commutes the operands in the given instruction.
-/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
-///
-/// Do not call this method for a non-commutable instruction or for
-/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
-/// Even though the instruction is commutable, the method may still
-/// fail to commute the operands, null pointer is returned in such cases.
-MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
- unsigned OpIdx0,
- unsigned OpIdx1) const {
- int CommutedOpcode = commuteOpcode(MI);
- if (CommutedOpcode == -1)
- return nullptr;
+bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
+ MachineOperand &Src0,
+ unsigned Src0OpName,
+ MachineOperand &Src1,
+ unsigned Src1OpName) const {
+ MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
+ if (!Src0Mods)
+ return false;
- int Src0Idx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
- if (!Src0.isReg())
+ MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
+ assert(Src1Mods &&
+ "All commutable instructions have both src0 and src1 modifiers");
+
+ int Src0ModsVal = Src0Mods->getImm();
+ int Src1ModsVal = Src1Mods->getImm();
+
+ Src1Mods->setImm(Src0ModsVal);
+ Src0Mods->setImm(Src1ModsVal);
+ return true;
+}
+
+static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
+ MachineOperand &RegOp,
+ MachineOperand &ImmOp) {
+ // TODO: Handle other immediate like types.
+ if (!ImmOp.isImm())
return nullptr;
- int Src1Idx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
+ int64_t ImmVal = ImmOp.getImm();
+ ImmOp.ChangeToRegister(RegOp.getReg(), false, false,
+ RegOp.isKill(), RegOp.isDead(), RegOp.isUndef(),
+ RegOp.isDebug());
+ ImmOp.setSubReg(RegOp.getSubReg());
+ RegOp.ChangeToImmediate(ImmVal);
+ return &MI;
+}
+
+MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+ unsigned Src0Idx,
+ unsigned Src1Idx) const {
+ assert(!NewMI && "this should never be used");
- if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
- OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
- (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
- OpIdx1 != static_cast<unsigned>(Src0Idx)))
+ unsigned Opc = MI.getOpcode();
+ int CommutedOpcode = commuteOpcode(Opc);
+ if (CommutedOpcode == -1)
return nullptr;
- MachineOperand &Src1 = MI.getOperand(Src1Idx);
+ assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
+ static_cast<int>(Src0Idx) &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
+ static_cast<int>(Src1Idx) &&
+ "inconsistency with findCommutedOpIndices");
- if (isVOP2(MI) || isVOPC(MI)) {
- const MCInstrDesc &InstrDesc = MI.getDesc();
- // For VOP2 and VOPC instructions, any operand type is valid to use for
- // src0. Make sure we can use the src0 as src1.
- //
- // We could be stricter here and only allow commuting if there is a reason
- // to do so. i.e. if both operands are VGPRs there is no real benefit,
- // although MachineCSE attempts to find matches by commuting.
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
- return nullptr;
- }
-
- MachineInstr *CommutedMI = &MI;
- if (!Src1.isReg()) {
- // Allow commuting instructions with Imm operands.
- if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) {
- return nullptr;
- }
- // Be sure to copy the source modifiers to the right place.
- if (MachineOperand *Src0Mods =
- getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) {
- MachineOperand *Src1Mods =
- getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
-
- int Src0ModsVal = Src0Mods->getImm();
- if (!Src1Mods && Src0ModsVal != 0)
- return nullptr;
-
- // XXX - This assert might be a lie. It might be useful to have a neg
- // modifier with 0.0.
- int Src1ModsVal = Src1Mods->getImm();
- assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ MachineOperand &Src1 = MI.getOperand(Src1Idx);
- Src1Mods->setImm(Src0ModsVal);
- Src0Mods->setImm(Src1ModsVal);
+ MachineInstr *CommutedMI = nullptr;
+ if (Src0.isReg() && Src1.isReg()) {
+ if (isOperandLegal(MI, Src1Idx, &Src0)) {
+ // Be sure to copy the source modifiers to the right place.
+ CommutedMI
+ = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
}
- unsigned Reg = Src0.getReg();
- unsigned SubReg = Src0.getSubReg();
- if (Src1.isImm())
- Src0.ChangeToImmediate(Src1.getImm());
- else
- llvm_unreachable("Should only have immediates");
-
- Src1.ChangeToRegister(Reg, false);
- Src1.setSubReg(SubReg);
+ } else if (Src0.isReg() && !Src1.isReg()) {
+ // src0 should always be able to support any operand type, so no need to
+ // check operand legality.
+ CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
+ } else if (!Src0.isReg() && Src1.isReg()) {
+ if (isOperandLegal(MI, Src1Idx, &Src0))
+ CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else {
- CommutedMI =
- TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
+ // FIXME: Found two non registers to commute. This does happen.
+ return nullptr;
}
- if (CommutedMI)
+
+ if (CommutedMI) {
+ swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
+ Src1, AMDGPU::OpName::src1_modifiers);
+
CommutedMI->setDesc(get(CommutedOpcode));
+ }
return CommutedMI;
}
@@ -1002,8 +998,7 @@ MachineInstr *SIInstrInfo::commuteInstru
// TargetInstrInfo::commuteInstruction uses it.
bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
unsigned &SrcOpIdx1) const {
- const MCInstrDesc &MCID = MI.getDesc();
- if (!MCID.isCommutable())
+ if (!MI.isCommutable())
return false;
unsigned Opc = MI.getOpcode();
@@ -1011,31 +1006,10 @@ bool SIInstrInfo::findCommutedOpIndices(
if (Src0Idx == -1)
return false;
- // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
- // immediate. Also, immediate src0 operand is not handled in
- // SIInstrInfo::commuteInstruction();
- if (!MI.getOperand(Src0Idx).isReg())
- return false;
-
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return false;
- MachineOperand &Src1 = MI.getOperand(Src1Idx);
- if (Src1.isImm()) {
- // SIInstrInfo::commuteInstruction() does support commuting the immediate
- // operand src1 in 2 and 3 operand instructions.
- if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode()))
- return false;
- } else if (Src1.isReg()) {
- // If any source modifiers are set, the generic instruction commuting won't
- // understand how to copy the source modifiers.
- if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))
- return false;
- } else
- return false;
-
return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Thu Sep 8 12:19:29 2016
@@ -86,6 +86,10 @@ private:
unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
protected:
+ bool swapSourceModifiers(MachineInstr &MI,
+ MachineOperand &Src0, unsigned Src0OpName,
+ MachineOperand &Src1, unsigned Src1OpName) const;
+
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx0,
unsigned OpIdx1) const override;
@@ -144,7 +148,12 @@ public:
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
LLVM_READONLY
- int commuteOpcode(const MachineInstr &MI) const;
+ int commuteOpcode(unsigned Opc) const;
+
+ LLVM_READONLY
+ inline int commuteOpcode(const MachineInstr &MI) const {
+ return commuteOpcode(MI.getOpcode());
+ }
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
Modified: llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll Thu Sep 8 12:19:29 2016
@@ -11,7 +11,7 @@
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]]
+; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1
; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
@@ -34,7 +34,7 @@ define void @use_group_to_flat_addrspace
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]]
+; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1
; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
@@ -79,7 +79,7 @@ define void @use_constant_to_flat_addrsp
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
-; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
+; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
@@ -96,7 +96,7 @@ define void @use_flat_to_group_addrspace
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
-; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
+; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll Thu Sep 8 12:19:29 2016
@@ -5,7 +5,7 @@ declare i1 @llvm.amdgcn.class.f32(float,
; Produces error after adding an implicit def to v_cndmask_b32
; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
-; GCN: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
+; GCN: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
; GCN: v_cndmask_b32_e64 v1, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/commute_modifiers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/commute_modifiers.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/commute_modifiers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/commute_modifiers.ll Thu Sep 8 12:19:29 2016
@@ -6,7 +6,7 @@ declare float @llvm.fma.f32(float, float
; FUNC-LABEL: @commute_add_imm_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0
; SI: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -20,7 +20,7 @@ define void @commute_add_imm_fabs_f32(fl
; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0
; SI: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -51,7 +51,7 @@ define void @commute_mul_imm_fneg_f32(fl
; FUNC-LABEL: @commute_add_lit_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
-; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[K]], |[[X]]|
; SI: buffer_store_dword [[REG]]
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll Thu Sep 8 12:19:29 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@@ -19,7 +19,7 @@ declare i32 @llvm.r600.read.tidig.x() no
; FUNC-LABEL: {{^}}s_ctlz_i32:
; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
-; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
; SI: buffer_store_dword [[RESULT]]
@@ -112,7 +112,7 @@ define void @v_ctlz_i8(i8 addrspace(1)*
; FUNC-LABEL: {{^}}s_ctlz_i64:
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
+; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctlz_zero_undef.ll Thu Sep 8 12:19:29 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@@ -92,7 +92,7 @@ define void @v_ctlz_zero_undef_i8(i8 add
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
+; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll Thu Sep 8 12:19:29 2016
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.fabs.f32(float) #1
@@ -18,7 +18,7 @@ define void @cvt_flr_i32_f32_0(i32 addrs
}
; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1:
-; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}}
+; SI: v_add_f32_e64 [[TMP:v[0-9]+]], s{{[0-9]+}}, 1.0
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
; SI: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/fabs.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fabs.f64.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fabs.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fabs.f64.ll Thu Sep 8 12:19:29 2016
@@ -55,7 +55,7 @@ define void @fabs_v4f64(<4 x double> add
; SI-LABEL: {{^}}fabs_fold_f64:
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|
; SI: s_endpgm
define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @llvm.fabs.f64(double %in0)
@@ -67,7 +67,7 @@ define void @fabs_fold_f64(double addrsp
; SI-LABEL: {{^}}fabs_fn_fold_f64:
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|
; SI: s_endpgm
define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @fabs(double %in0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fabs.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fabs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fabs.ll Thu Sep 8 12:19:29 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -75,7 +75,7 @@ define void @fabs_v4f32(<4 x float> addr
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]|
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @fabs(float %in0)
%fmul = fmul float %fabs, %in1
@@ -87,7 +87,7 @@ define void @fabs_fn_fold(float addrspac
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]|
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1
Modified: llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll Thu Sep 8 12:19:29 2016
@@ -25,7 +25,7 @@ declare <16 x double> @llvm.ceil.v16f64(
; SI-DAG: cmp_lt_i32
; SI-DAG: cndmask_b32
; SI-DAG: cndmask_b32
-; SI-DAG: v_cmp_lt_f64
+; SI-DAG: v_cmp_gt_f64
; SI-DAG: v_cmp_lg_f64
; SI-DAG: v_cndmask_b32
; SI: v_cndmask_b32
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmaxnum.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmaxnum.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmaxnum.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmaxnum.ll Thu Sep 8 12:19:29 2016
@@ -234,7 +234,7 @@ define void @constant_fold_fmax_f32_n0_n
}
; FUNC-LABEL: @fmax_var_immediate_f32
-; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
@@ -246,7 +246,7 @@ define void @fmax_var_immediate_f32(floa
}
; FUNC-LABEL: @fmax_immediate_var_f32
-; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/fminnum.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fminnum.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fminnum.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fminnum.ll Thu Sep 8 12:19:29 2016
@@ -233,7 +233,7 @@ define void @constant_fold_fmin_f32_n0_n
}
; FUNC-LABEL: @fmin_var_immediate_f32
-; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
@@ -244,7 +244,7 @@ define void @fmin_var_immediate_f32(floa
}
; FUNC-LABEL: @fmin_immediate_var_f32
-; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll Thu Sep 8 12:19:29 2016
@@ -72,7 +72,7 @@ define void @multiple_use_fadd_multi_fma
}
; GCN-LABEL: {{^}}fmul_x2_xn2:
-; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], -4.0, [[X:s[0-9]+]]
+; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
; GCN: buffer_store_dword [[RESULT]]
define void @fmul_x2_xn2(float addrspace(1)* %out, float %x, float %y) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f64.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f64.ll Thu Sep 8 12:19:29 2016
@@ -1,11 +1,11 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
; FIXME: Check something here. Currently it seems fabs + fneg aren't
; into 2 modifiers, although theoretically that should work.
; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:
-; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
+; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}}
define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
@@ -25,7 +25,7 @@ define void @v_fneg_fabs_fadd_f64(double
}
; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:
-; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
+; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}}
define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.ll Thu Sep 8 12:19:29 2016
@@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
; SI-NOT: and
-; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+; SI: v_subrev_f32_e64 {{v[0-9]+}}, |{{v[0-9]+}}|, {{s[0-9]+}}
define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
@@ -15,7 +15,7 @@ define void @fneg_fabs_fadd_f32(float ad
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
; SI-NOT: and
-; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
+; SI: v_mul_f32_e64 {{v[0-9]+}}, -|{{v[0-9]+}}|, {{s[0-9]+}}
; SI-NOT: and
define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
Modified: llvm/trunk/test/CodeGen/AMDGPU/fsub64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fsub64.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fsub64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fsub64.ll Thu Sep 8 12:19:29 2016
@@ -39,7 +39,7 @@ define void @fsub_fabs_inv_f64(double ad
}
; SI-LABEL: {{^}}s_fsub_f64:
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}}
define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
%sub = fsub double %a, %b
store double %sub, double addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/imm.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/imm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/imm.ll Thu Sep 8 12:19:29 2016
@@ -128,7 +128,7 @@ define void @store_literal_imm_f32(float
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.0
@@ -138,7 +138,7 @@ define void @add_inline_imm_0.0_f32(floa
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.5
@@ -148,7 +148,7 @@ define void @add_inline_imm_0.5_f32(floa
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -0.5
@@ -158,7 +158,7 @@ define void @add_inline_imm_neg_0.5_f32(
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 1.0
@@ -168,7 +168,7 @@ define void @add_inline_imm_1.0_f32(floa
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -1.0
@@ -178,7 +178,7 @@ define void @add_inline_imm_neg_1.0_f32(
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 2.0
@@ -188,7 +188,7 @@ define void @add_inline_imm_2.0_f32(floa
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -2.0
@@ -198,7 +198,7 @@ define void @add_inline_imm_neg_2.0_f32(
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 4.0
@@ -208,7 +208,7 @@ define void @add_inline_imm_4.0_f32(floa
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -4.0
@@ -240,7 +240,7 @@ define void @commute_add_literal_f32(flo
; CHECK-LABEL: {{^}}add_inline_imm_1_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36a0000000000000
@@ -250,7 +250,7 @@ define void @add_inline_imm_1_f32(float
; CHECK-LABEL: {{^}}add_inline_imm_2_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}}
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36b0000000000000
@@ -260,7 +260,7 @@ define void @add_inline_imm_2_f32(float
; CHECK-LABEL: {{^}}add_inline_imm_16_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 16
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36e0000000000000
@@ -270,7 +270,7 @@ define void @add_inline_imm_16_f32(float
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0xffffffffe0000000
@@ -280,7 +280,7 @@ define void @add_inline_imm_neg_1_f32(fl
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2{{$}}
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0xffffffffc0000000
@@ -290,7 +290,7 @@ define void @add_inline_imm_neg_2_f32(fl
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -16
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0xfffffffe00000000
@@ -300,7 +300,7 @@ define void @add_inline_imm_neg_16_f32(f
; CHECK-LABEL: {{^}}add_inline_imm_63_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 63
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36ff800000000000
@@ -310,7 +310,7 @@ define void @add_inline_imm_63_f32(float
; CHECK-LABEL: {{^}}add_inline_imm_64_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
-; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 64
; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x3700000000000000
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll Thu Sep 8 12:19:29 2016
@@ -7,7 +7,7 @@ declare float @llvm.AMDGPU.clamp.f32(flo
; FUNC-LABEL: {{^}}clamp_0_1_f32:
; SI: s_load_dword [[ARG:s[0-9]+]],
-; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0 clamp{{$}}
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
@@ -20,7 +20,7 @@ define void @clamp_0_1_f32(float addrspa
; FUNC-LABEL: {{^}}clamp_fabs_0_1_f32:
; SI: s_load_dword [[ARG:s[0-9]+]],
-; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, |[[ARG]]| clamp{{$}}
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], |[[ARG]]|, 0 clamp{{$}}
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @clamp_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
@@ -32,7 +32,7 @@ define void @clamp_fabs_0_1_f32(float ad
; FUNC-LABEL: {{^}}clamp_fneg_0_1_f32:
; SI: s_load_dword [[ARG:s[0-9]+]],
-; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -[[ARG]] clamp{{$}}
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], -[[ARG]], 0 clamp{{$}}
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @clamp_fneg_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
@@ -44,7 +44,7 @@ define void @clamp_fneg_0_1_f32(float ad
; FUNC-LABEL: {{^}}clamp_fneg_fabs_0_1_f32:
; SI: s_load_dword [[ARG:s[0-9]+]],
-; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -|[[ARG]]| clamp{{$}}
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], -|[[ARG]]|, 0 clamp{{$}}
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @clamp_fneg_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll Thu Sep 8 12:19:29 2016
@@ -77,7 +77,7 @@ define void @test_div_fmas_f64(double ad
}
; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
-; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
+; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
%cmp = icmp eq i32 %i, 0
@@ -110,7 +110,7 @@ define void @test_div_fmas_f32_imm_true_
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}}
-; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
+; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
; SI: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll Thu Sep 8 12:19:29 2016
@@ -6,7 +6,7 @@ declare i64 @llvm.amdgcn.fcmp.f64(double
declare float @llvm.fabs.f32(float) #0
; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
-; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, {{s[0-9]+}}
define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
%temp = call float @llvm.fabs.f32(float %a)
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
@@ -15,7 +15,7 @@ define void @v_fcmp_f32_oeq_with_fabs(i6
}
; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs:
-; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
+; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, |{{s[0-9]+}}|
define void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
%temp = call float @llvm.fabs.f32(float %a)
%src_input = call float @llvm.fabs.f32(float %src)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll Thu Sep 8 12:19:29 2016
@@ -27,7 +27,7 @@ define void @test_mul_legacy_undef1_f32(
}
; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
-; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}|
+; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |s{{[0-9]+}}|
define void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll Thu Sep 8 12:19:29 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -9,7 +9,7 @@
; SI-DAG: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
-; SI: v_cmp_le_f32_e64 vcc, 0.5, |[[SUB]]|
+; SI: v_cmp_ge_f32_e64 vcc, |[[SUB]]|, 0.5
; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]]
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
; SI: buffer_store_dword [[RESULT]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madak.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madak.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll Thu Sep 8 12:19:29 2016
@@ -151,7 +151,7 @@ define void @s_s_madak_f32(float addrspa
; GCN-LABEL: {{^}}no_madak_src0_modifier_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
+; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
; GCN: s_endpgm
define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -173,7 +173,7 @@ define void @no_madak_src0_modifier_f32(
; GCN-LABEL: {{^}}no_madak_src1_modifier_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
+; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
; GCN: s_endpgm
define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Modified: llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madmk.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madmk.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madmk.ll Thu Sep 8 12:19:29 2016
@@ -128,7 +128,8 @@ define void @scalar_vector_madmk_f32(flo
; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32:
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], |[[VA]]|, [[VB]]
define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll Thu Sep 8 12:19:29 2016
@@ -58,7 +58,7 @@ define void @s_select_v4i32(<4 x i32> ad
; FUNC-LABEL: {{^}}v_select_v4i32:
; SI: buffer_load_dwordx4
-; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}}
+; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
@@ -96,7 +96,7 @@ define void @select_v8i32(<8 x i32> addr
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
-; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
+; SI-DAG: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; SI: v_cndmask_b32_e32
; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
@@ -112,7 +112,7 @@ define void @s_select_v2f32(<2 x float>
; FUNC-LABEL: {{^}}s_select_v4f32:
; SI: s_load_dwordx4
; SI: s_load_dwordx4
-; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
+; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
@@ -129,7 +129,7 @@ define void @s_select_v4f32(<4 x float>
; FUNC-LABEL: {{^}}v_select_v4f32:
; SI: buffer_load_dwordx4
-; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}}
+; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll Thu Sep 8 12:19:29 2016
@@ -178,7 +178,7 @@ define void @cmp_sext_k_neg1(i1 addrspac
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
; GCN: s_load_dword [[B:s[0-9]+]]
-; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -1, [[B]]
+; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc.ll Thu Sep 8 12:19:29 2016
@@ -391,8 +391,8 @@ endif:
}
; FUNC-LABEL: setcc-i1-and-xor
-; SI-DAG: v_cmp_le_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
-; SI-DAG: v_cmp_ge_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], 1.0, s{{[0-9]+}}
+; SI-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
+; SI-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
; SI: s_and_b64 s[2:3], [[A]], [[B]]
define void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
bb0:
Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Thu Sep 8 12:19:29 2016
@@ -302,7 +302,7 @@ end:
}
; CHECK-LABEL: {{^}}no_skip_no_successors:
-; CHECK: v_cmp_nle_f32
+; CHECK: v_cmp_nge_f32
; CHECK: s_and_b64 vcc, exec,
; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll Thu Sep 8 12:19:29 2016
@@ -4,7 +4,7 @@
; GCN-FUNC: {{^}}vccz_workaround:
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
-; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0
+; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN: s_and_b64 vcc, exec, [[MASK]]
; GCN: s_waitcnt lgkmcnt(0)
; VCCZ-BUG: s_mov_b64 vcc, vcc
Modified: llvm/trunk/test/CodeGen/AMDGPU/trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/trunc.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/trunc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/trunc.ll Thu Sep 8 12:19:29 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -73,7 +73,7 @@ define void @sgpr_trunc_i32_to_i1(i32 ad
; SI-LABEL: {{^}}s_trunc_i64_to_i1:
; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]]
-; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], 1, [[MASKED]]
+; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], [[MASKED]], 1{{$}}
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
%trunc = trunc i64 %x to i1
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll?rev=280970&r1=280969&r2=280970&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll Thu Sep 8 12:19:29 2016
@@ -31,7 +31,7 @@ done:
; SI-LABEL: {{^}}uniform_if_vcc:
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
; also scheduled the write first.
-; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
+; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; SI-DAG: s_and_b64 vcc, exec, [[COND]]
; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
@@ -88,7 +88,7 @@ done:
; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
; also scheduled the write first.
-; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
+; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; SI-DAG: s_and_b64 vcc, exec, [[COND]]
; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
@@ -253,7 +253,7 @@ ENDIF:
; SI: s_load_dword [[COND:s[0-9]+]]
; SI: s_cmp_lt_i32 [[COND]], 1
; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
-; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]]
+; SI: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}}
; SI: s_and_b64 vcc, exec, [[MASK]]
; SI: s_cbranch_vccnz [[EXIT]]
; SI: buffer_store
More information about the llvm-commits
mailing list