[llvm] r307575 - AMDGPU: Allow SIShrinkInstructions to work in non-SSA
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 10 12:53:58 PDT 2017
Author: arsenm
Date: Mon Jul 10 12:53:57 2017
New Revision: 307575
URL: http://llvm.org/viewvc/llvm-project?rev=307575&view=rev
Log:
AMDGPU: Allow SIShrinkInstructions to work in non-SSA
Immediates can be folded as long as the immediate is a vreg.
Also undo commuting instructions if it didn't fold an immediate.
Added:
llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/trunk/test/CodeGen/AMDGPU/add.i16.ll
llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll
llvm/trunk/test/CodeGen/AMDGPU/ds_read2.ll
llvm/trunk/test/CodeGen/AMDGPU/ds_read2_superreg.ll
llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll
llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
llvm/trunk/test/CodeGen/AMDGPU/fabs.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
llvm/trunk/test/CodeGen/AMDGPU/fadd.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll
llvm/trunk/test/CodeGen/AMDGPU/fma-combine.ll
llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.ll
llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll
llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll
llvm/trunk/test/CodeGen/AMDGPU/fmul.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f32.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fneg.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fpext.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fptosi.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fptoui.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fptrunc.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fract.ll
llvm/trunk/test/CodeGen/AMDGPU/frem.ll
llvm/trunk/test/CodeGen/AMDGPU/fsub.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fsub.ll
llvm/trunk/test/CodeGen/AMDGPU/half.ll
llvm/trunk/test/CodeGen/AMDGPU/immv216.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.floor.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.fma.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.log2.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.rint.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll
llvm/trunk/test/CodeGen/AMDGPU/madak.ll
llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
llvm/trunk/test/CodeGen/AMDGPU/rsq.ll
llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll
llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll
llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll
llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll
llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll
llvm/trunk/test/CodeGen/AMDGPU/usubo.ll
llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll
llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll
llvm/trunk/test/CodeGen/AMDGPU/xor.ll
llvm/trunk/test/CodeGen/AMDGPU/zext-i64-bit-operand.ll
llvm/trunk/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Mon Jul 10 12:53:57 2017
@@ -126,45 +126,54 @@ static bool canShrink(MachineInstr &MI,
/// \brief This function checks \p MI for operands defined by a move immediate
/// instruction and then folds the literal constant into the instruction if it
-/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
-/// and will only fold literal constants if we are still in SSA.
-static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
+/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
+static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineRegisterInfo &MRI, bool TryToCommute = true) {
-
- if (!MRI.isSSA())
- return;
-
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
// Try to fold Src0
MachineOperand &Src0 = MI.getOperand(Src0Idx);
- if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
+ if (Src0.isReg()) {
unsigned Reg = Src0.getReg();
- MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
- if (Def && Def->isMoveImmediate()) {
- MachineOperand &MovSrc = Def->getOperand(1);
- bool ConstantFolded = false;
-
- if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
- isUInt<32>(MovSrc.getImm()))) {
- Src0.ChangeToImmediate(MovSrc.getImm());
- ConstantFolded = true;
- }
- if (ConstantFolded) {
- if (MRI.use_empty(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
+ MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (Def && Def->isMoveImmediate()) {
+ MachineOperand &MovSrc = Def->getOperand(1);
+ bool ConstantFolded = false;
+
+ if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
+ isUInt<32>(MovSrc.getImm()))) {
+ // It's possible to have only one component of a super-reg defined by
+ // a single mov, so we need to clear any subregister flag.
+ Src0.setSubReg(0);
+ Src0.ChangeToImmediate(MovSrc.getImm());
+ ConstantFolded = true;
+ }
+
+ if (ConstantFolded) {
+ assert(MRI.use_empty(Reg));
Def->eraseFromParent();
- ++NumLiteralConstantsFolded;
- return;
+ ++NumLiteralConstantsFolded;
+ return true;
+ }
}
}
}
// We have failed to fold src0, so commute the instruction and try again.
- if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI))
- foldImmediates(MI, TII, MRI, false);
+ if (TryToCommute && MI.isCommutable()) {
+ if (TII->commuteInstruction(MI)) {
+ if (foldImmediates(MI, TII, MRI, false))
+ return true;
+
+ // Commute back.
+ TII->commuteInstruction(MI);
+ }
+ }
+ return false;
}
// Copy MachineOperand with all flags except setting it as implicit.
Modified: llvm/trunk/test/CodeGen/AMDGPU/add.i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/add.i16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/add.i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/add.i16.ll Mon Jul 10 12:53:57 2017
@@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}v_test_add_i16:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: buffer_store_short [[ADD]]
define amdgpu_kernel void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -67,7 +67,7 @@ define amdgpu_kernel void @v_test_add_i1
; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -86,7 +86,7 @@ define amdgpu_kernel void @v_test_add_i1
; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
+; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:{{[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -105,7 +105,7 @@ define amdgpu_kernel void @v_test_add_i1
; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: buffer_store_dword [[SEXT]]
define amdgpu_kernel void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
@@ -125,7 +125,7 @@ define amdgpu_kernel void @v_test_add_i1
; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/add.v2i16.ll Mon Jul 10 12:53:57 2017
@@ -168,10 +168,10 @@ define amdgpu_kernel void @v_test_add_v2
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
-; VI: v_add_u16_e32 v[[ADD_HI:[0-9]+]], v[[B_HI]], v[[A_HI]]
+; VI: v_add_u16_e32 v[[ADD_HI:[0-9]+]], v[[A_HI]], v[[B_HI]]
; VI-NOT: and
; VI-NOT: shl
-; VI: v_add_u16_e32 v[[ADD_LO:[0-9]+]], v[[B_LO]], v[[A_LO]]
+; VI: v_add_u16_e32 v[[ADD_LO:[0-9]+]], v[[A_LO]], v[[B_LO]]
; VI-NOT: and
; VI-NOT: shl
; VI: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll Mon Jul 10 12:53:57 2017
@@ -107,7 +107,7 @@ define amdgpu_kernel void @fold_mi_v_not
; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
-; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
+; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]], v[[VREG1_LO]]
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctlz.ll Mon Jul 10 12:53:57 2017
@@ -154,7 +154,7 @@ define amdgpu_kernel void @s_ctlz_i64_tr
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc
-; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
+; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[OR]]
; GCN-DAG: v_cndmask_b32_e32 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], vcc
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI:[0-9]+]]{{\]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/ds_read2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ds_read2.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ds_read2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ds_read2.ll Mon Jul 10 12:53:57 2017
@@ -9,7 +9,7 @@
; SI-LABEL: @simple_read2_f32
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
; SI: s_waitcnt lgkmcnt(0)
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 {
@@ -28,7 +28,7 @@ define amdgpu_kernel void @simple_read2_
; SI-LABEL: @simple_read2_f32_max_offset
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
; SI: s_waitcnt lgkmcnt(0)
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/ds_read2_superreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ds_read2_superreg.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ds_read2_superreg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ds_read2_superreg.ll Mon Jul 10 12:53:57 2017
@@ -38,9 +38,9 @@ define amdgpu_kernel void @simple_read2_
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align4:
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
-; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_Z]], v[[REG_X]]
-; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[REG_Y]]
-; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD1]], v[[ADD0]]
+; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_X]], v[[REG_Z]]
+; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_Y]], v[[REG_W]]
+; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD0]], v[[ADD1]]
; CI: buffer_store_dword v[[ADD2]]
; CI: s_endpgm
define amdgpu_kernel void @simple_read2_v4f32_superreg_align4(float addrspace(1)* %out) #0 {
@@ -64,8 +64,8 @@ define amdgpu_kernel void @simple_read2_
; CI-LABEL: {{^}}simple_read2_v3f32_superreg_align4:
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read_b32 v[[REG_Z:[0-9]+]], v{{[0-9]+}} offset:8{{$}}
-; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_Z]], v[[REG_X]]
-; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_Y]], v[[ADD0]]
+; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_X]], v[[REG_Z]]
+; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[ADD0]], v[[REG_Y]]
; CI: buffer_store_dword v[[ADD1]]
; CI: s_endpgm
define amdgpu_kernel void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll Mon Jul 10 12:53:57 2017
@@ -7,7 +7,7 @@
; SI-LABEL: @simple_read2st64_f32_0_1
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; SI: s_waitcnt lgkmcnt(0)
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define amdgpu_kernel void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
@@ -26,7 +26,7 @@ define amdgpu_kernel void @simple_read2s
; SI-LABEL: @simple_read2st64_f32_1_2
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: s_waitcnt lgkmcnt(0)
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define amdgpu_kernel void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
@@ -46,7 +46,7 @@ define amdgpu_kernel void @simple_read2s
; SI-LABEL: @simple_read2st64_f32_max_offset
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
; SI: s_waitcnt lgkmcnt(0)
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define amdgpu_kernel void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll Mon Jul 10 12:53:57 2017
@@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; Test that the -enable-no-signed-zeros-fp-math flag works
; GCN-LABEL: {{^}}fneg_fsub_f32:
-; GCN: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
; GCN-UNSAFE-NOT: xor
Modified: llvm/trunk/test/CodeGen/AMDGPU/fabs.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fabs.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fabs.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fabs.f16.ll Mon Jul 10 12:53:57 2017
@@ -39,9 +39,9 @@ define amdgpu_kernel void @s_fabs_f16(ha
; VI: flat_load_ushort [[HI:v[0-9]+]]
; VI: flat_load_ushort [[LO:v[0-9]+]]
; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
-; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]]
+; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[HI]], [[MASK]]
; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[LO]], [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_HI]], [[FABS_LO]]
+; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_LO]], [[FABS_HI]]
; VI: flat_store_dword
; GFX9: s_load_dword [[VAL:s[0-9]+]]
@@ -62,8 +62,8 @@ define amdgpu_kernel void @s_fabs_v2f16(
; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
-; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
+; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]]
+; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]]
; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -80,7 +80,7 @@ define amdgpu_kernel void @s_fabs_v4f16(
; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]]
; CI-DAG: v_cvt_f32_f16_e64 [[ABS_CVT1:v[0-9]+]], |[[IN1]]|
-; CI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[CVT0]], [[ABS_CVT1]]
+; CI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[ABS_CVT1]], [[CVT0]]
; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]]
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fabs_free_v2f
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[FABS]], v{{[0-9]+$}}
define amdgpu_kernel void @v_fabs_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
%fmul = fmul <2 x half> %fabs, %val
Modified: llvm/trunk/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll Mon Jul 10 12:53:57 2017
@@ -16,8 +16,8 @@
; GCN: buffer_load_dword [[U:v[0-9]+]]
; GCN: buffer_load_dword [[V:v[0-9]+]]
-; GCN-FLUSH: v_mac_f32_e32 [[Z]], [[V]], [[U]]
-; GCN-FLUSH-NEXT: v_mac_f32_e32 [[Z]], [[Y]], [[X]]
+; GCN-FLUSH: v_mac_f32_e32 [[Z]], [[U]], [[V]]
+; GCN-FLUSH-NEXT: v_mac_f32_e32 [[Z]], [[X]], [[Y]]
; GCN-FLUSH-NEXT: buffer_store_dword [[Z]]
; GCN-FASTFMA: v_fma_f32 [[FMA0:v[0-9]+]], [[U]], [[V]], [[Z]]
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fast_add_fmul
; GCN: buffer_load_dword [[V:v[0-9]+]]
; GCN-FLUSH: v_mad_f32 [[TMP:v[0-9]]], [[U]], [[V]], -[[Z]]
-; GCN-FLUSH-NEXT: v_mac_f32_e32 [[TMP]], [[Y]], [[X]]
+; GCN-FLUSH-NEXT: v_mac_f32_e32 [[TMP]], [[X]], [[Y]]
; GCN-FLUSH-NEXT: buffer_store_dword [[Z]]
; GCN-FASTFMA: v_fma_f32 [[FMA0:v[0-9]+]], [[U]], [[V]], -[[Z]]
@@ -75,13 +75,13 @@ define amdgpu_kernel void @fast_sub_fmul
; GCN: buffer_load_dword [[U:v[0-9]+]]
; GCN: buffer_load_dword [[V:v[0-9]+]]
-; GCN-FLUSH-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[V]], [[U]]
-; GCN-FLUSH-DAG: v_mac_f32_e32 [[MUL]], [[Y]], [[X]]
-; GCN-FLUSH: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[U]]
+; GCN-FLUSH-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]]
+; GCN-FLUSH-DAG: v_mac_f32_e32 [[MUL]], [[X]], [[Y]]
+; GCN-FLUSH: v_add_f32_e32 v{{[0-9]+}}, [[U]], [[Z]]
-; GCN-FASTFMA: v_mul_f32_e32 [[MUL:v[0-9]+]], [[V]], [[U]]
+; GCN-FASTFMA: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]]
; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[MUL]]
-; GCN-FASTFMA: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[FMA1]]
+; GCN-FASTFMA: v_add_f32_e32 v{{[0-9]+}}, [[FMA1]], [[Z]]
; GCN-SLOWFMA: v_mul_f32_e32
; GCN-SLOWFMA: v_mul_f32_e32
@@ -108,13 +108,13 @@ define amdgpu_kernel void @fast_add_fmul
; GCN: buffer_load_dword [[U:v[0-9]+]]
; GCN: buffer_load_dword [[V:v[0-9]+]]
-; GCN-FLUSH-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[V]], [[U]]
-; GCN-FLUSH-DAG: v_mac_f32_e32 [[MUL]], [[Y]], [[X]]
-; GCN-FLUSH: v_add_f32_e32 v{{[0-9]+}}, [[U]], [[Z]]
+; GCN-FLUSH-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]]
+; GCN-FLUSH-DAG: v_mac_f32_e32 [[MUL]], [[X]], [[Y]]
+; GCN-FLUSH: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[U]]
-; GCN-FASTFMA: v_mul_f32_e32 [[MUL:v[0-9]+]], [[V]], [[U]]
+; GCN-FASTFMA: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]]
; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[MUL]]
-; GCN-FASTFMA: v_add_f32_e32 v{{[0-9]+}}, [[FMA1]], [[Z]]
+; GCN-FASTFMA: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[FMA1]]
; GCN-SLOWFMA: v_mul_f32_e32
; GCN-SLOWFMA: v_mul_f32_e32
@@ -191,17 +191,17 @@ define amdgpu_kernel void @fast_add_fmul
; GCN: buffer_load_dword [[U:v[0-9]+]]
; GCN: buffer_load_dword [[V:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[V]], [[U]]
+; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]]
-; GCN-FLUSH: v_mad_f32 [[MAD:v[0-9]+]], [[Y]], [[X]], [[MUL]]
-; GCN-FLUSH: v_subrev_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[MAD]]
+; GCN-FLUSH: v_mad_f32 [[MAD:v[0-9]+]], [[X]], [[Y]], [[MUL]]
+; GCN-FLUSH: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MAD]], [[Z]]
; GCN-FASTFMA: v_fma_f32 [[MAD:v[0-9]+]], [[X]], [[Y]], [[MUL]]
-; GCN-FASTFMA: v_subrev_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[MAD]]
+; GCN-FASTFMA: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MAD]], [[Z]]
-; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[Y]], [[X]]
+; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]]
; GCN-SLOWFMA: v_add_f32_e32
-; GCN-SLOWFMA: v_subrev_f32_e32 [[MAD:v[0-9]+]]
+; GCN-SLOWFMA: v_sub_f32_e32 [[MAD:v[0-9]+]]
; GCN: buffer_store_dword [[MUL]]
; GCN: buffer_store_dword [[MAD]]
@@ -226,21 +226,21 @@ define amdgpu_kernel void @fast_sub_fmul
; GCN: buffer_load_dword [[U:v[0-9]+]]
; GCN: buffer_load_dword [[V:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[V]], [[U]]
+; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]]
-; GCN-FLUSH-NEXT: v_mac_f32_e32 [[MUL]], [[Y]], [[X]]
-; GCN-FLUSH-NEXT: v_subrev_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[MUL]]
+; GCN-FLUSH-NEXT: v_mac_f32_e32 [[MUL]], [[X]], [[Y]]
+; GCN-FLUSH-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MUL]], [[Z]]
; GCN-FLUSH-NEXT: buffer_store_dword [[MUL]]
; GCN-FLUSH-NEXT: buffer_store_dword [[SUB]]
; GCN-FASTFMA-NEXT: v_fma_f32 [[FMA:v[0-9]+]], [[X]], [[Y]], [[U]]
-; GCN-FASTFMA-NEXT: v_subrev_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[FMA]]
+; GCN-FASTFMA-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[FMA]], [[Z]]
; GCN-FASTFMA-NEXT: buffer_store_dword [[FMA]]
; GCN-FASTFMA-NEXT: buffer_store_dword [[SUB]]
-; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[Y]], [[X]]
+; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]]
; GCN-SLOWFMA: v_add_f32_e32
-; GCN-SLOWFMA: v_subrev_f32_e32
+; GCN-SLOWFMA: v_sub_f32_e32
define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd() #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
Modified: llvm/trunk/test/CodeGen/AMDGPU/fadd.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fadd.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fadd.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fadd.f16.ll Mon Jul 10 12:53:57 2017
@@ -6,9 +6,9 @@
; GCN: {{buffer|flat}}_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
+; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]]
+; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fadd_f16(
@@ -70,16 +70,16 @@ entry:
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
-; SI-DAG: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
-; SI-DAG: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
+; SI-DAG: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
+; SI-DAG: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
-; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_LO]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -108,12 +108,12 @@ entry:
; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], 2.0, v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000
; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -139,12 +139,12 @@ entry:
; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], 1.0, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]]
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll Mon Jul 10 12:53:57 2017
@@ -27,7 +27,7 @@
; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]]
; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]]
-; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]]
+; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_LHS]], [[RCP_RHS]]
; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]]
; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -165,7 +165,7 @@ entry:
; VI: flat_load_ushort [[RHS:v[0-9]+]]
; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
-; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
+; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[LHS]], [[RCP]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fdiv_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #0 {
@@ -187,7 +187,7 @@ entry:
; VI: flat_load_ushort [[RHS:v[0-9]+]]
; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
-; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
+; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[LHS]], [[RCP]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fdiv_f16_unsafe(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #2 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll Mon Jul 10 12:53:57 2017
@@ -20,7 +20,7 @@
; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
-; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[B]], [[DEN_SCALE]]
+; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
@@ -45,7 +45,7 @@ entry:
; GCN-NOT: s_setreg
; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
-; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[B]], [[DEN_SCALE]]
+; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fma-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fma-combine.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fma-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fma-combine.ll Mon Jul 10 12:53:57 2017
@@ -387,7 +387,7 @@ define amdgpu_kernel void @aggressive_co
; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
@@ -403,7 +403,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
@@ -419,7 +419,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
@@ -435,7 +435,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
@@ -451,7 +451,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
@@ -467,7 +467,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
@@ -483,7 +483,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
@@ -499,7 +499,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
@@ -515,7 +515,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
@@ -531,7 +531,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
@@ -547,7 +547,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
@@ -563,7 +563,7 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
;
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
define amdgpu_kernel void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
@@ -583,8 +583,8 @@ define amdgpu_kernel void @test_f32_mul_
; FUNC-LABEL: {{^}}test_f32_interp:
; SI-NOFMA: v_sub_f32_e32 [[VT1:v[0-9]]], 1.0, [[VT:v[0-9]]]
-; SI-NOFMA: v_mul_f32_e32 [[VTY:v[0-9]]], [[VT1]], [[VY:v[0-9]]]
-; SI-NOFMA: v_mac_f32_e32 [[VTY]], [[VT]], [[VX:v[0-9]]]
+; SI-NOFMA: v_mul_f32_e32 [[VTY:v[0-9]]], [[VY:v[0-9]]], [[VT1]]
+; SI-NOFMA: v_mac_f32_e32 [[VTY]], [[VX:v[0-9]]], [[VT]]
;
; SI-FMA: v_fma_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VT]], [[VR]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.ll Mon Jul 10 12:53:57 2017
@@ -10,7 +10,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
-; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
@@ -31,7 +31,7 @@ define amdgpu_kernel void @test_fmax_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
@@ -51,7 +51,7 @@ define amdgpu_kernel void @test_fmax_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
-; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
@@ -71,7 +71,7 @@ define amdgpu_kernel void @test_fmax_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
@@ -91,7 +91,7 @@ define amdgpu_kernel void @test_fmax_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll Mon Jul 10 12:53:57 2017
@@ -872,8 +872,8 @@ define amdgpu_kernel void @v_test_global
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
-; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[B]], [[A]]
-; GCN: v_min_f32_e32 v{{[0-9]+}}, [[C]], [[MAX]]
+; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[A]], [[B]]
+; GCN: v_min_f32_e32 v{{[0-9]+}}, [[MAX]], [[C]]
define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.ll Mon Jul 10 12:53:57 2017
@@ -45,7 +45,7 @@ define amdgpu_kernel void @s_test_fmin_l
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
-; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_fmin_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -83,7 +83,7 @@ define amdgpu_kernel void @test_fmin_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmin_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
-; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -121,7 +121,7 @@ define amdgpu_kernel void @test_fmin_leg
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
-; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmul.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmul.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmul.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmul.f16.ll Mon Jul 10 12:53:57 2017
@@ -6,9 +6,9 @@
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
+; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]]
+; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_f16(
@@ -70,16 +70,16 @@ entry:
; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
-; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
-; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
+; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
+; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
-; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_LO]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -108,7 +108,7 @@ entry:
; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_v2f16_imm_a(
@@ -134,7 +134,7 @@ entry:
; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_v2f16_imm_b(
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f16.ll Mon Jul 10 12:53:57 2017
@@ -79,7 +79,7 @@ define amdgpu_kernel void @fmuladd_a_2.0
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fadd_a_a_b_f16(half addrspace(1)* %out,
@@ -108,7 +108,7 @@ define amdgpu_kernel void @fadd_a_a_b_f1
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fadd_b_a_a_f16(half addrspace(1)* %out,
@@ -227,8 +227,8 @@ define amdgpu_kernel void @fmuladd_2.0_a
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
-; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_sub_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
@@ -257,8 +257,8 @@ define amdgpu_kernel void @mad_sub_f16(h
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
-; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
+; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_sub_inv_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
@@ -287,7 +287,7 @@ define amdgpu_kernel void @mad_sub_inv_f
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
-; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
+; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
; VI-DENORM-STRICT: v_sub_f16_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -319,7 +319,7 @@ define amdgpu_kernel void @mad_sub_fabs_
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
+; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
; VI-DENORM-STRICT: v_sub_f16_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -347,13 +347,13 @@ define amdgpu_kernel void @mad_sub_fabs_
; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]]
-; VI-FLUSH: v_mac_f16_e32 [[REGC]], [[REGB]], [[REGA]]
+; VI-FLUSH: v_mac_f16_e32 [[REGC]], [[REGA]], [[REGB]]
; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
-; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
+; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
@@ -385,7 +385,7 @@ define amdgpu_kernel void @neg_neg_mad_f
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
; VI-DENORM-STRICT: v_mul_f16_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
-; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_fabs_sub_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
@@ -416,7 +416,7 @@ define amdgpu_kernel void @mad_fabs_sub_
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fsub_c_fadd_a_a_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
@@ -444,7 +444,7 @@ define amdgpu_kernel void @fsub_c_fadd_a
; VI-DENORM-CONTRACT: v_fma_f16 [[R2]], [[R1]], 2.0, -[[R2]]
; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fsub_fadd_a_a_c_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f32.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f32.ll Mon Jul 10 12:53:57 2017
@@ -67,7 +67,7 @@ define amdgpu_kernel void @fmul_fadd_f32
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; SI-DENORM buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -96,7 +96,7 @@ define amdgpu_kernel void @fmuladd_2.0_a
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -125,10 +125,10 @@ define amdgpu_kernel void @fmuladd_a_2.0
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -160,10 +160,10 @@ define amdgpu_kernel void @fadd_a_a_b_f3
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -192,7 +192,7 @@ define amdgpu_kernel void @fadd_b_a_a_f3
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -221,7 +221,7 @@ define amdgpu_kernel void @fmuladd_neg_2
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], -2.0, [[R2]]
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -252,7 +252,7 @@ define amdgpu_kernel void @fmuladd_neg_2
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]]
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -282,7 +282,7 @@ define amdgpu_kernel void @fmuladd_2.0_n
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -310,11 +310,11 @@ define amdgpu_kernel void @fmuladd_2.0_a
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; GCN-DENORM-STRICT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
; SI: buffer_store_dword [[RESULT]]
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -345,11 +345,11 @@ define amdgpu_kernel void @mad_sub_f32(f
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; GCN-DENORM-STRICT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
+; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
; SI: buffer_store_dword [[RESULT]]
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -379,10 +379,10 @@ define amdgpu_kernel void @mad_sub_inv_f
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
+; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
; SI: buffer_store_dword [[RESULT]]
@@ -414,10 +414,10 @@ define amdgpu_kernel void @mad_sub_fabs_
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
+; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
; SI: buffer_store_dword [[RESULT]]
@@ -446,17 +446,17 @@ define amdgpu_kernel void @mad_sub_fabs_
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
-; GCN-FLUSH: v_mac_f32_e32 [[REGC]], [[REGB]], [[REGA]]
+; GCN-FLUSH: v_mac_f32_e32 [[REGC]], [[REGA]], [[REGB]]
; SI-FLUSH: buffer_store_dword [[REGC]]
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
+; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
+; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -489,10 +489,10 @@ define amdgpu_kernel void @neg_neg_mad_f
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
-; GCN-DENORM-SLOWFMA-CONTRACT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
; GCN-DENORM-STRICT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
-; GCN-DENORM-STRICT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
; SI: buffer_store_dword [[RESULT]]
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -525,10 +525,10 @@ define amdgpu_kernel void @mad_fabs_sub_
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -556,10 +556,10 @@ define amdgpu_kernel void @fsub_c_fadd_a
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
; SI: buffer_store_dword [[RESULT]]
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll Mon Jul 10 12:53:57 2017
@@ -9,7 +9,7 @@
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
@@ -31,7 +31,7 @@ define amdgpu_kernel void @v_fneg_add_f3
; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
; GCN-NEXT: buffer_store_dword [[ADD]]
@@ -54,7 +54,7 @@ define amdgpu_kernel void @v_fneg_add_st
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
@@ -82,10 +82,10 @@ define amdgpu_kernel void @v_fneg_add_mu
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-SAFE: v_subrev_f32_e32
+; GCN-SAFE: v_sub_f32_e32
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000,
-; GCN-NSZ: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -106,10 +106,10 @@ define amdgpu_kernel void @v_fneg_add_fn
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-SAFE: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
-; GCN-NSZ: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; GCN-NSZ-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -133,7 +133,7 @@ define amdgpu_kernel void @v_fneg_add_x_
; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
-; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -157,11 +157,11 @@ define amdgpu_kernel void @v_fneg_add_fn
; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}}
; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]]
-; GCN-SAFE: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]]
; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
-; GCN-NSZ-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]]
; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]]
define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
@@ -185,10 +185,10 @@ define amdgpu_kernel void @v_fneg_add_st
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
-; GCN-SAFE-DAG: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
-; GCN-NSZ-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]]
; GCN-NSZ-NEXT: buffer_store_dword [[MUL]]
@@ -235,7 +235,7 @@ define amdgpu_kernel void @v_fneg_mul_f3
; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
; GCN: buffer_store_dword [[ADD]]
@@ -280,7 +280,7 @@ define amdgpu_kernel void @v_fneg_mul_mu
; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -300,7 +300,7 @@ define amdgpu_kernel void @v_fneg_mul_fn
; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -342,7 +342,7 @@ define amdgpu_kernel void @v_fneg_mul_fn
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
-; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
; GCN: buffer_store_dword [[NEG_A]]
define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
@@ -364,7 +364,7 @@ define amdgpu_kernel void @v_fneg_mul_st
; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
; GCN: buffer_store_dword [[MUL]]
@@ -974,7 +974,7 @@ define amdgpu_kernel void @v_fneg_fma_mu
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
-; GCN-SAFE: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
@@ -1000,7 +1000,7 @@ define amdgpu_kernel void @v_fneg_fmad_f
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
-; GCN-SAFE: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
@@ -1449,7 +1449,7 @@ define amdgpu_kernel void @v_fneg_mul_le
; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
; GCN: buffer_store_dword [[ADD]]
@@ -1494,7 +1494,7 @@ define amdgpu_kernel void @v_fneg_mul_le
; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1514,7 +1514,7 @@ define amdgpu_kernel void @v_fneg_mul_le
; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1556,7 +1556,7 @@ define amdgpu_kernel void @v_fneg_mul_le
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
-; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
; GCN: buffer_store_dword [[NEG_A]]
define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
@@ -1578,7 +1578,7 @@ define amdgpu_kernel void @v_fneg_mul_le
; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[B]], [[A]]
+; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
; GCN: buffer_store_dword [[MUL]]
@@ -1664,7 +1664,7 @@ define amdgpu_kernel void @v_fneg_trunc_
; GCN-LABEL: {{^}}v_fneg_round_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_trunc_f32_e32
-; GCN: v_subrev_f32_e32
+; GCN: v_sub_f32_e32
; GCN: v_cndmask_b32
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
@@ -1782,11 +1782,11 @@ define amdgpu_kernel void @v_fneg_interp
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
; GCN: s_cbranch_scc1
; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
-; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[C]], [[XOR]]
+; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
; GCN: buffer_store_dword [[MUL1]]
; GCN: buffer_store_dword [[MUL0]]
@@ -1851,7 +1851,7 @@ define amdgpu_kernel void @v_fneg_inline
; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
; GCN: ; use [[NEG]]
; GCN: buffer_store_dword [[MUL]]
@@ -1984,8 +1984,8 @@ define amdgpu_kernel void @multiuse_fneg
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
-; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[C]], [[FMA0]]
-; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[D]], [[FMA0]]
+; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
+; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
; GCN: buffer_store_dword [[MUL1]]
; GCN-NEXT: buffer_store_dword [[MUL2]]
@@ -2084,7 +2084,7 @@ define amdgpu_kernel void @one_use_cost_
; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
-; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[D]], [[TRUNC_A]]
+; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
; GCN: buffer_store_dword [[FMA0]]
; GCN: buffer_store_dword [[MUL1]]
define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll Mon Jul 10 12:53:57 2017
@@ -5,7 +5,7 @@
; GCN-LABEL: {{^}}fneg_fabs_fadd_f16:
; CI: v_cvt_f32_f16_e32
; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}|
-; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}}
+; CI: v_sub_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_ABS_X]]
; GFX89-NOT: _and
; GFX89: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|
@@ -20,7 +20,7 @@ define amdgpu_kernel void @fneg_fabs_fad
; GCN-LABEL: {{^}}fneg_fabs_fmul_f16:
; CI-DAG: v_cvt_f32_f16_e32
; CI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG_ABS_X:v[0-9]+]], -|{{v[0-9]+}}|
-; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}}
+; CI: v_mul_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, [[CVT_NEG_ABS_X]]
; CI: v_cvt_f16_f32_e32
; GFX89-NOT: _and
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg.f16.ll Mon Jul 10 12:53:57 2017
@@ -46,7 +46,7 @@ define amdgpu_kernel void @fneg_free_f16
; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]]
; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]]
-; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_VAL]], [[NEG_CVT0]]
+; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[NEG_CVT0]], [[CVT_VAL]]
; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fpext.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fpext.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fpext.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fpext.f16.ll Mon Jul 10 12:53:57 2017
@@ -154,7 +154,7 @@ entry:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f32_f16_e64 [[CVTA_NEG:v[0-9]+]], -[[A]]
; SI-DAG: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
-; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA]], [[CVTA_NEG]]
+; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA_NEG]], [[CVTA]]
; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT_NEGA:v[0-9]+]], -[[A]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fptosi.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fptosi.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fptosi.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fptosi.f16.ll Mon Jul 10 12:53:57 2017
@@ -60,7 +60,7 @@ entry:
; SI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
; SI: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]]
; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]]
-; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_LO]]
+; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_LO]], v[[R_I16_HI]]
; VI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Modified: llvm/trunk/test/CodeGen/AMDGPU/fptoui.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fptoui.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fptoui.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fptoui.f16.ll Mon Jul 10 12:53:57 2017
@@ -60,7 +60,7 @@ entry:
; SI: v_cvt_u32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
; SI: v_cvt_u32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]]
; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]]
-; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_HI]], v[[R_I16_0]]
+; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_HI]]
; VI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Modified: llvm/trunk/test/CodeGen/AMDGPU/fptrunc.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fptrunc.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fptrunc.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fptrunc.f16.ll Mon Jul 10 12:53:57 2017
@@ -38,10 +38,10 @@ entry:
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_cvt_f16_f32_sdwa v[[R_F16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
@@ -68,7 +68,7 @@ entry:
; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fract.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fract.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fract.ll Mon Jul 10 12:53:57 2017
@@ -9,7 +9,7 @@ declare float @llvm.floor.f32(float) #0
; GCN-LABEL: {{^}}fract_f32:
; GCN-SAFE: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
-; GCN-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
+; GCN-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[INPUT]], [[FLR]]
; GCN-UNSAFE: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/frem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frem.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frem.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frem.ll Mon Jul 10 12:53:57 2017
@@ -29,7 +29,7 @@ define amdgpu_kernel void @frem_f32(floa
; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
-; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
+; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[X]], [[INVY]]
; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
; GCN: buffer_store_dword [[RESULT]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/fsub.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fsub.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fsub.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fsub.f16.ll Mon Jul 10 12:53:57 2017
@@ -7,9 +7,9 @@
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_subrev_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
+; SI: v_sub_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; GFX89: v_subrev_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]]
+; GFX89: v_sub_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fsub_f16(
@@ -70,16 +70,16 @@ entry:
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
-; SI: v_subrev_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
-; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
+; SI: v_sub_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
+; SI: v_sub_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
-; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1]
@@ -109,12 +109,12 @@ entry:
; SI: v_sub_f32_e32 v[[R_F32_1:[0-9]+]], 2.0, v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40003c00
; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[B_V2_F16]], [[K]] neg_lo:[1,0] neg_hi:[1,0]
@@ -143,12 +143,12 @@ entry:
; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], -1.0, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]]
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00c000
; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], [[K]]{{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/fsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fsub.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fsub.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fsub.ll Mon Jul 10 12:53:57 2017
@@ -3,7 +3,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}v_fsub_f32:
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
%a = load float, float addrspace(1)* %in, align 4
@@ -41,10 +41,10 @@ define amdgpu_kernel void @fsub_v2f32(<2
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
@@ -67,7 +67,7 @@ define amdgpu_kernel void @s_fsub_v4f32(
}
; FUNC-LABEL: {{^}}v_fneg_fsub_f32:
-; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
define amdgpu_kernel void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
@@ -80,7 +80,7 @@ define amdgpu_kernel void @v_fneg_fsub_f
}
; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32:
-; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; SI-NOT: xor
define amdgpu_kernel void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
@@ -93,7 +93,7 @@ define amdgpu_kernel void @v_fneg_fsub_n
}
; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
-; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; SI-NOT: xor
define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
@@ -109,7 +109,7 @@ define amdgpu_kernel void @v_fneg_fsub_n
; make sure it is disabled and the fneg is not folded if it is not
; "true".
; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
-; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/half.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/half.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/half.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/half.ll Mon Jul 10 12:53:57 2017
@@ -17,7 +17,7 @@ define amdgpu_kernel void @load_f16_arg(
; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
-; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]]
+; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
@@ -471,10 +471,10 @@ define amdgpu_kernel void @global_truncs
; SI-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
; SI-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
-; SI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]]
+; SI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
; VI-DAG: v_cvt_f16_f32_sdwa [[CVT1:v[0-9]+]], v[[HI]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT1]], [[CVT0]]
+; VI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[CVT1]]
; GCN-DAG: buffer_store_dword [[PACKED]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/immv216.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/immv216.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/immv216.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/immv216.ll Mon Jul 10 12:53:57 2017
@@ -305,7 +305,7 @@ define amdgpu_kernel void @commute_add_i
; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
; VI-DAG: buffer_load_dword
; VI-NOT: and
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}
+; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: buffer_store_dword
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.ceil.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.ceil.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.ceil.f16.ll Mon Jul 10 12:53:57 2017
@@ -33,12 +33,12 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_ceil_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_ceil_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.cos.f16.ll Mon Jul 10 12:53:57 2017
@@ -29,8 +29,8 @@ entry:
; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; SI-DAG: v_mul_f32_e32 v[[M_F32_0:[0-9]+]], v[[HALF_PIE]], v[[A_F32_0]]
-; SI-DAG: v_mul_f32_e32 v[[M_F32_1:[0-9]+]], v[[HALF_PIE]], v[[A_F32_1]]
+; SI-DAG: v_mul_f32_e32 v[[M_F32_0:[0-9]+]], v[[A_F32_0]], v[[HALF_PIE]]
+; SI-DAG: v_mul_f32_e32 v[[M_F32_1:[0-9]+]], v[[A_F32_1]], v[[HALF_PIE]]
; VI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
@@ -48,8 +48,8 @@ entry:
; GCN-NOT: and
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @cos_v2f16(
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.exp2.f16.ll Mon Jul 10 12:53:57 2017
@@ -33,12 +33,12 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_exp_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_exp_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.floor.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.floor.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.floor.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.floor.f16.ll Mon Jul 10 12:53:57 2017
@@ -33,12 +33,12 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_floor_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_floor_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.fma.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.fma.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.fma.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.fma.f16.ll Mon Jul 10 12:53:57 2017
@@ -128,7 +128,7 @@ define amdgpu_kernel void @fma_f16_imm_c
; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16(
@@ -167,7 +167,7 @@ define amdgpu_kernel void @fma_v2f16(
; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16_imm_a(
@@ -210,7 +210,7 @@ define amdgpu_kernel void @fma_v2f16_imm
; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16_imm_b(
@@ -253,7 +253,7 @@ define amdgpu_kernel void @fma_v2f16_imm
; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16_imm_c(
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll Mon Jul 10 12:53:57 2017
@@ -13,11 +13,11 @@ declare <2 x half> @llvm.fmuladd.v2f16(<
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
-; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
+; SI: v_mac_f32_e32 v[[C_F32]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
; SI: buffer_store_short v[[R_F16]]
-; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]]
+; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], v[[A_F16]], v[[B_F16]]
; VI-FLUSH: buffer_store_short v[[C_F16]]
; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
@@ -110,19 +110,19 @@ define amdgpu_kernel void @fmuladd_f16_i
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
-; SI: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
-; SI: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
+; SI: v_mac_f32_e32 v[[C_F32_0]], v[[A_F32_0]], v[[B_F32_0]]
+; SI: v_mac_f32_e32 v[[C_F32_1]], v[[A_F32_1]], v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]]
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_LO]], v[[R_F16_HI]]
; VI-FLUSH: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[B_V2_F16]], v[[C_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]]
+; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[B_V2_F16]], v[[C_V2_F16]]
; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]]
; VI-FLUSH-NOT: v_and_b32
-; VI-FLUSH: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]]
+; VI-FLUSH: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[R_F16_HI]]
; VI-DENORM: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; VI-DENORM: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
@@ -131,7 +131,7 @@ define amdgpu_kernel void @fmuladd_f16_i
; VI-DENORM-DAG: v_fma_f16 v[[RES1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16_1]]
; VI-DENORM-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[RES1]]
; VI-DENORM-NOT: v_and_b32
-; VI-DENORM: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[RES0]]
+; VI-DENORM: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[RES0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.log2.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.log2.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.log2.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.log2.f16.ll Mon Jul 10 12:53:57 2017
@@ -33,12 +33,12 @@ entry:
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_log_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_log_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll Mon Jul 10 12:53:57 2017
@@ -9,9 +9,9 @@ declare <2 x half> @llvm.maxnum.v2f16(<2
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
+; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_max_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]]
+; VI: v_max_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @maxnum_f16(
@@ -73,18 +73,18 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
-; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
-; SI-DAG: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
+; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
+; SI-DAG: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
-; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -115,7 +115,7 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @maxnum_v2f16_imm_a(
@@ -143,7 +143,7 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @maxnum_v2f16_imm_b(
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.minnum.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.minnum.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.minnum.f16.ll Mon Jul 10 12:53:57 2017
@@ -9,9 +9,9 @@ declare <2 x half> @llvm.minnum.v2f16(<2
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_min_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
+; SI: v_min_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_min_f16_e32 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]]
+; VI: v_min_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @minnum_f16(
@@ -72,18 +72,18 @@ entry:
; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
-; SI: v_min_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
-; SI-DAG: v_min_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
+; SI: v_min_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
+; SI-DAG: v_min_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
-; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -116,7 +116,7 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @minnum_v2f16_imm_a(
@@ -144,7 +144,7 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
-; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @minnum_v2f16_imm_b(
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.rint.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.rint.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.rint.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.rint.f16.ll Mon Jul 10 12:53:57 2017
@@ -34,12 +34,12 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: v_and_b32
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_rndne_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_rndne_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: v_and_b32
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GFX9: v_rndne_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; GFX9: v_rndne_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.round.ll Mon Jul 10 12:53:57 2017
@@ -12,7 +12,7 @@
; GCN: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
; GCN: v_cmp_ge_f32_e64 vcc, |[[SUB]]|, 0.5
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]]
-; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
+; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TRUNC]], [[SEL]]
; GCN: buffer_store_dword [[RESULT]]
; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
@@ -70,7 +70,7 @@ define amdgpu_kernel void @round_v8f32(<
; GFX89: v_sub_f16_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
; GFX89: v_cmp_ge_f16_e64 vcc, |[[SUB]]|, 0.5
; GFX89: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[COPYSIGN]]
-; GFX89: v_add_f16_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
+; GFX89: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TRUNC]], [[SEL]]
; GFX89: buffer_store_short [[RESULT]]
define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 {
%x.arg.trunc = trunc i32 %x.arg to i16
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.sin.f16.ll Mon Jul 10 12:53:57 2017
@@ -29,9 +29,9 @@ entry:
; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; SI-DAG: v_mul_f32_e32 v[[M_F32_0:[0-9]+]], v[[HALF_PIE]], v[[A_F32_0]]
+; SI-DAG: v_mul_f32_e32 v[[M_F32_0:[0-9]+]], v[[A_F32_0]], v[[HALF_PIE]]
; SI-DAG: v_fract_f32_e32 v[[F_F32_0:[0-9]+]], v[[M_F32_0]]
-; SI-DAG: v_mul_f32_e32 v[[M_F32_1:[0-9]+]], v[[HALF_PIE]], v[[A_F32_1]]
+; SI-DAG: v_mul_f32_e32 v[[M_F32_1:[0-9]+]], v[[A_F32_1]], v[[HALF_PIE]]
; SI-DAG: v_fract_f32_e32 v[[F_F32_1:[0-9]+]], v[[M_F32_1]]
; VI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
@@ -47,10 +47,10 @@ entry:
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_cvt_f16_f32_sdwa v[[R_F16_1:[0-9]+]], v[[R_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll Mon Jul 10 12:53:57 2017
@@ -33,12 +33,12 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: v_and_b32
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_sqrt_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_sqrt_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: v_and_b32
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.trunc.f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.trunc.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.trunc.f16.ll Mon Jul 10 12:53:57 2017
@@ -33,12 +33,12 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI-NOT: v_and_b32
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; VI-DAG: v_trunc_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
; VI-DAG: v_trunc_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NOT: v_and_b32
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll Mon Jul 10 12:53:57 2017
@@ -19,15 +19,15 @@ declare float @llvm.fmuladd.f32(float, f
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; SI-STD: v_mac_f32_e32 [[C]], [[A]], [[B]]
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
; SI-DENORM-SLOWFMAF-NOT: v_fma
; SI-DENORM-SLOWFMAF-NOT: v_mad
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; SI-STD: buffer_store_dword [[C]]
@@ -55,15 +55,15 @@ define amdgpu_kernel void @combine_to_ma
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
-; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
+; SI-STD-DAG: v_mac_f32_e32 [[C]], [[A]], [[B]]
+; SI-STD-DAG: v_mac_f32_e32 [[D]], [[A]], [[B]]
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
-; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
@@ -99,11 +99,11 @@ define amdgpu_kernel void @combine_to_ma
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; SI-STD: v_mac_f32_e32 [[C]], [[A]], [[B]]
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
; SI-DENORM: buffer_store_dword [[RESULT]]
; SI-STD: buffer_store_dword [[C]]
@@ -133,8 +133,8 @@ define amdgpu_kernel void @combine_to_ma
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
@@ -167,9 +167,9 @@ define amdgpu_kernel void @combine_to_ma
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
@@ -205,8 +205,8 @@ define amdgpu_kernel void @combine_to_ma
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
@@ -238,9 +238,9 @@ define amdgpu_kernel void @combine_to_ma
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
@@ -278,7 +278,7 @@ define amdgpu_kernel void @combine_to_ma
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
+; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
@@ -313,8 +313,8 @@ define amdgpu_kernel void @combine_to_ma
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
@@ -355,9 +355,9 @@ define amdgpu_kernel void @combine_to_ma
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[A]], [[B]]
; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
-; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
@@ -395,13 +395,13 @@ define amdgpu_kernel void @combine_to_ma
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
+; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
-; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-; SI-DENORM: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
+; SI-DENORM: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP1]], [[C]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
@@ -437,13 +437,13 @@ define amdgpu_kernel void @aggressive_co
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
-; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
+; SI-STD: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
-; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
-; SI-DENORM: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
+; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
@@ -479,21 +479,21 @@ define amdgpu_kernel void @aggressive_co
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[A]]
-; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP0]]
+; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
+; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[A]], [[B]]
+; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[C]]
; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[B]], [[A]]
+; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[A]], [[B]]
-; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
+; SI-DENORM-FASTFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
-; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
+; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
+; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[A]], [[B]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP1]], [[TMP0]]
+; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[C]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
@@ -530,21 +530,21 @@ define amdgpu_kernel void @aggressive_co
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[C]], [[B]]
-; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[A]]
+; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
+; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[C]]
+; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP0]]
; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
-; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
-; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
+; SI-DENORM-FASTFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP1]]
-; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
-; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]]
+; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
+; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[C]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP1]], [[TMP0]]
+; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP2]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
Modified: llvm/trunk/test/CodeGen/AMDGPU/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madak.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madak.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll Mon Jul 10 12:53:57 2017
@@ -34,8 +34,8 @@ define amdgpu_kernel void @madak_f32(flo
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VB]], [[VA]], [[VK]]
-; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]]
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]]
+; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
; GCN: s_endpgm
define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -199,7 +199,7 @@ define amdgpu_kernel void @no_madak_src1
; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
; GCN: buffer_load_dword [[VGPR:v[0-9]+]]
; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
-; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]]
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
; GCN: buffer_store_dword [[MUL]]
define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
bb:
Modified: llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madmk.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madmk.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madmk.ll Mon Jul 10 12:53:57 2017
@@ -32,8 +32,8 @@ define amdgpu_kernel void @madmk_f32(flo
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mac_f32_e32 [[VB]], [[VK]], [[VA]]
-; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]]
+; GCN-DAG: v_mac_f32_e32 [[VB]], [[VA]], [[VK]]
+; GCN-DAG: v_mac_f32_e32 [[VC]], [[VA]], [[VK]]
; GCN: s_endpgm
define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Modified: llvm/trunk/test/CodeGen/AMDGPU/rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rsq.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rsq.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rsq.ll Mon Jul 10 12:53:57 2017
@@ -48,8 +48,8 @@ define amdgpu_kernel void @rsq_f32_sgpr(
; SI-UNSAFE-DAG: v_rsq_f32_e32 [[RSQA:v[0-9]+]], [[A]]
; SI-UNSAFE-DAG: v_rcp_f32_e32 [[RCPB:v[0-9]+]], [[B]]
-; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RCPB]], [[RSQA]]
-; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RSQA]], [[RCPB]]
+; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
; SI-UNSAFE: buffer_store_dword [[RESULT]]
; SI-SAFE-NOT: v_rsq_f32
Modified: llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll Mon Jul 10 12:53:57 2017
@@ -6,7 +6,7 @@
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 16, [[VAL]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[SHR]]
-; GCN: v_or_b32_e32 v[[OR:[0-9]+]], [[SHL]], [[SHR]]
+; GCN: v_or_b32_e32 v[[OR:[0-9]+]], [[SHR]], [[SHL]]
; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[OR]]
; GCN: buffer_store_dwordx2 v{{\[}}[[OR]]:[[COPY]]{{\]}}
define amdgpu_kernel void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll Mon Jul 10 12:53:57 2017
@@ -14,8 +14,8 @@
; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200
; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400
-; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[C200]], [[CLAMP_IDX]]
-; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[C400]], [[CLAMP_IDX]]
+; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[C200]]
+; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[C400]]
; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Mon Jul 10 12:53:57 2017
@@ -35,7 +35,7 @@ define amdgpu_kernel void @sub_shr_i32(i
; GCN-LABEL: {{^}}mul_shr_i32:
; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}}
; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}}
-; NOSDWA: v_mul_u32_u24_e32 v{{[0-9]+}}, v[[DST1]], v[[DST0]]
+; NOSDWA: v_mul_u32_u24_e32 v{{[0-9]+}}, v[[DST0]], v[[DST1]]
; NOSDWA-NOT: v_mul_u32_u24_sdwa
; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
@@ -68,9 +68,9 @@ entry:
; GCN-LABEL: {{^}}mul_v2i16:
; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}}
; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}}
-; NOSDWA: v_mul_u32_u24_e32 v[[DST_MUL:[0-9]+]], v[[DST1]], v[[DST0]]
+; NOSDWA: v_mul_u32_u24_e32 v[[DST_MUL:[0-9]+]], v[[DST0]], v[[DST1]]
; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MUL]]
-; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_SHL]], v{{[0-9]+}}
+; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]]
; NOSDWA-NOT: v_mul_u32_u24_sdwa
; VI-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
@@ -168,14 +168,14 @@ entry:
; GCN-LABEL: {{^}}mul_v2half:
; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}}
; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}}
-; NOSDWA: v_mul_f16_e32 v[[DST_MUL:[0-9]+]], v[[DST1]], v[[DST0]]
+; NOSDWA: v_mul_f16_e32 v[[DST_MUL:[0-9]+]], v[[DST0]], v[[DST1]]
; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MUL]]
-; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_SHL]], v{{[0-9]+}}
+; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]]
; NOSDWA-NOT: v_mul_f16_sdwa
; VI-DAG: v_mul_f16_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_mul_f16_e32 v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
-; VI: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL_HI]], v[[DST_MUL_LO]]
+; VI: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]]
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -362,9 +362,9 @@ entry:
; GCN-LABEL: {{^}}mac_v2half:
; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}}
; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}}
-; NOSDWA: v_mac_f16_e32 v[[DST_MAC:[0-9]+]], v[[DST1]], v[[DST0]]
+; NOSDWA: v_mac_f16_e32 v[[DST_MAC:[0-9]+]], v[[DST0]], v[[DST1]]
; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MAC]]
-; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_SHL]], v{{[0-9]+}}
+; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]]
; NOSDWA-NOT: v_mac_f16_sdwa
; VI: v_mac_f16_sdwa v[[DST_MAC:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
@@ -491,7 +491,7 @@ entry:
%tmp17 = shufflevector <2 x i8> %tmp10, <2 x i8> %tmp12, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tmp18 = shufflevector <2 x i8> %tmp14, <2 x i8> %tmp16, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tmp19 = shufflevector <4 x i8> %tmp17, <4 x i8> %tmp18, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-
+
%arrayidx5 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %destValues, i64 %idxprom
store <8 x i8> %tmp19, <8 x i8> addrspace(1)* %arrayidx5, align 8
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll Mon Jul 10 12:53:57 2017
@@ -103,7 +103,7 @@ define amdgpu_kernel void @add_select_mu
; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -122,7 +122,7 @@ define amdgpu_kernel void @add_select_fa
; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -154,7 +154,7 @@ define amdgpu_kernel void @add_select_fa
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -171,7 +171,7 @@ define amdgpu_kernel void @add_select_po
; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -191,7 +191,7 @@ define amdgpu_kernel void @add_select_ne
; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -245,7 +245,7 @@ define amdgpu_kernel void @add_select_po
; GCN: buffer_load_dword [[Z:v[0-9]+]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -266,8 +266,8 @@ define amdgpu_kernel void @add_select_fn
; GCN: buffer_load_dword [[W:v[0-9]+]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
-; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[X]], [[W]]
+; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -291,7 +291,7 @@ define amdgpu_kernel void @add_select_mu
; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
-; GCN-DAG: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[SELECT]], [[Z]]
+; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
; GCN: buffer_store_dword [[ADD]]
; GCN: buffer_store_dword [[NEG_X]]
@@ -316,8 +316,8 @@ define amdgpu_kernel void @add_select_mu
; GCN: buffer_load_dword [[W:v[0-9]+]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
-; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[Y]], [[W]]
+; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -341,7 +341,7 @@ define amdgpu_kernel void @add_select_mu
; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -359,7 +359,7 @@ define amdgpu_kernel void @add_select_fn
; GCN: buffer_load_dword [[Y:v[0-9]+]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -377,7 +377,7 @@ define amdgpu_kernel void @add_select_fn
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -397,7 +397,7 @@ define amdgpu_kernel void @add_select_fn
; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -414,7 +414,7 @@ define amdgpu_kernel void @add_select_fn
; GCN: v_cmp_eq_u32_e64
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -431,7 +431,7 @@ define amdgpu_kernel void @add_select_ne
; GCN: v_cmp_eq_u32_e64
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -445,7 +445,7 @@ define amdgpu_kernel void @add_select_ne
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -462,7 +462,7 @@ define amdgpu_kernel void @add_select_fn
; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -479,7 +479,7 @@ define amdgpu_kernel void @add_select_ne
; GCN: buffer_load_dword [[Y:v[0-9]+]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -497,7 +497,7 @@ define amdgpu_kernel void @add_select_fn
; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -517,7 +517,7 @@ define amdgpu_kernel void @add_select_po
; GCN-DAG: v_or_b32_e32 [[X_NEG_ABS:v[0-9]+]], 0x80000000, [[X]]
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -540,7 +540,7 @@ define amdgpu_kernel void @add_select_ne
; GCN-DAG: v_or_b32_e32 [[Y_NEG_ABS:v[0-9]+]], 0x80000000, [[Y]]
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -563,7 +563,7 @@ define amdgpu_kernel void @add_select_fa
; GCN-DAG: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -585,7 +585,7 @@ define amdgpu_kernel void @add_select_ne
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc
-; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -606,7 +606,7 @@ define amdgpu_kernel void @add_select_fa
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -628,7 +628,7 @@ define amdgpu_kernel void @add_select_ne
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc
-; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
+; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll Mon Jul 10 12:53:57 2017
@@ -7,7 +7,7 @@
; GCN: buffer_load_dword [[B:v[0-9]+]]
; GCN: buffer_load_dword [[C:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
; GCN: buffer_store_dword [[MUL]]
define amdgpu_kernel void @multi_use_fneg_src() #0 {
@@ -30,7 +30,7 @@ define amdgpu_kernel void @multi_use_fne
; GCN: buffer_load_dword [[B:v[0-9]+]]
; GCN: buffer_load_dword [[C:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
@@ -78,7 +78,7 @@ define amdgpu_kernel void @multi_use_fne
; GCN: buffer_load_dword [[A:v[0-9]+]]
; GCN: buffer_load_dword [[B:v[0-9]+]]
-; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[B]], [[A]]
+; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
; GCN: buffer_store_dword [[MUL1]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll Mon Jul 10 12:53:57 2017
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_uextract_bi
; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v{{[[0-9]+}}
; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]]
+; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[ELT1PART]], v[[SHLLO]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Mon Jul 10 12:53:57 2017
@@ -343,7 +343,7 @@ body: |
...
---
# GCN-LABEL: name: shrink_addc_vop3{{$}}
-# GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit %vcc, implicit %exec
+# GCN: %29 = V_ADDC_U32_e32 %19, %17, implicit-def %vcc, implicit %vcc, implicit %exec
# GCN %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
name: shrink_addc_vop3
@@ -429,7 +429,7 @@ body: |
---
# GCN-LABEL: name: shrink_addc_undef_vcc{{$}}
-# GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit undef %vcc, implicit %exec
+# GCN: %29 = V_ADDC_U32_e32 %19, %17, implicit-def %vcc, implicit undef %vcc, implicit %exec
# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
name: shrink_addc_undef_vcc
alignment: 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll Mon Jul 10 12:53:57 2017
@@ -18,7 +18,7 @@ define amdgpu_kernel void @s_abs_i32(i32
; FUNC-LABEL: {{^}}v_abs_i32:
; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
; GCN: v_add_i32
; EG: MAX_INT
@@ -34,7 +34,7 @@ define amdgpu_kernel void @v_abs_i32(i32
; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
-; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
+; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]]
define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
%val = load i32, i32 addrspace(1)* %src, align 4
@@ -71,8 +71,8 @@ define amdgpu_kernel void @s_abs_v2i32(<
; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
; GCN: v_add_i32
; GCN: v_add_i32
@@ -132,10 +132,10 @@ define amdgpu_kernel void @s_abs_v4i32(<
; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
-; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
; GCN: v_add_i32
; GCN: v_add_i32
@@ -184,8 +184,8 @@ define amdgpu_kernel void @s_min_max_i32
; GCN: {{buffer|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL1]], [[VAL0]]
-; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL1]], [[VAL0]]
+; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]]
+; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]]
define amdgpu_kernel void @v_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
%val0 = load volatile i32, i32 addrspace(1)* %ptr0
%val1 = load volatile i32, i32 addrspace(1)* %ptr1
Modified: llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll Mon Jul 10 12:53:57 2017
@@ -5,7 +5,7 @@
; GCN-LABEL: {{^}}v_test_sub_i16:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: buffer_store_short [[ADD]]
define amdgpu_kernel void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -68,7 +68,7 @@ define amdgpu_kernel void @v_test_sub_i1
; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: buffer_store_dword [[ADD]]
define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -88,7 +88,7 @@ define amdgpu_kernel void @v_test_sub_i1
; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
+; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -107,7 +107,7 @@ define amdgpu_kernel void @v_test_sub_i1
; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: buffer_store_dword [[SEXT]]
define amdgpu_kernel void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
@@ -127,7 +127,7 @@ define amdgpu_kernel void @v_test_sub_i1
; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
+; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sub.v2i16.ll Mon Jul 10 12:53:57 2017
@@ -6,7 +6,7 @@
; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI: v_subrev_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; VI: v_sub_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -165,10 +165,10 @@ define amdgpu_kernel void @v_test_sub_v2
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
-; VI: v_subrev_u16_e32 v[[ADD_HI:[0-9]+]], v[[B_HI]], v[[A_HI]]
+; VI: v_sub_u16_e32 v[[ADD_HI:[0-9]+]], v[[A_HI]], v[[B_HI]]
; VI-NOT: and
; VI-NOT: shl
-; VI: v_subrev_u16_e32 v[[ADD_LO:[0-9]+]], v[[B_LO]], v[[A_LO]]
+; VI: v_sub_u16_e32 v[[ADD_LO:[0-9]+]], v[[A_LO]], v[[B_LO]]
; VI-NOT: and
; VI-NOT: shl
; VI: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}
@@ -201,8 +201,8 @@ define amdgpu_kernel void @v_test_sub_v2
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
-; VI-DAG: v_subrev_u16_e32
-; VI-DAG: v_subrev_u16_e32
+; VI: v_sub_u16_e32
+; VI: v_sub_u16_e32
; VI: buffer_store_dwordx4
define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
@@ -228,8 +228,8 @@ define amdgpu_kernel void @v_test_sub_v2
; GFX9-DAG: v_ashrrev_i32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
-; VI: v_subrev_u16_e32
-; VI: v_subrev_u16_e32
+; VI: v_sub_u16_e32
+; VI: v_sub_u16_e32
; VI: buffer_store_dwordx2
define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -253,7 +253,7 @@ define amdgpu_kernel void @v_test_sub_v2
; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI: v_subrev_u16_e32
+; VI: v_sub_u16_e32
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
Modified: llvm/trunk/test/CodeGen/AMDGPU/usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/usubo.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/usubo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/usubo.ll Mon Jul 10 12:53:57 2017
@@ -120,7 +120,7 @@ define amdgpu_kernel void @v_usubo_i64(i
}
; FUNC-LABEL: {{^}}v_usubo_i16:
-; VI: v_subrev_u16_e32
+; VI: v_sub_u16_e32
; VI: v_cmp_gt_u16_e32
define amdgpu_kernel void @v_usubo_i16(i16 addrspace(1)* %out, i1 addrspace(1)* %carryout, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Modified: llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll Mon Jul 10 12:53:57 2017
@@ -6,7 +6,7 @@
; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
; GCN: buffer_load_dword [[B:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4
; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
-; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; GCN: v_mac_f32_e32 [[C]], [[A]], [[B]]
; GCN: buffer_store_dword [[C]]
define amdgpu_kernel void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
@@ -135,7 +135,7 @@ entry:
; GCN-LABEL: {{^}}safe_mad_sub0_src0:
; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0,
-; GCN: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[SUB0]]
+; GCN: v_mac_f32_e32 v{{[0-9]+}}, [[SUB0]], v{{[0-9]+}}
define amdgpu_kernel void @safe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_mac_f16.ll Mon Jul 10 12:53:57 2017
@@ -8,10 +8,10 @@
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
-; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
+; SI: v_mac_f32_e32 v[[C_F32]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
; SI: buffer_store_short v[[R_F16]]
-; VI: v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]]
+; VI: v_mac_f16_e32 v[[C_F16]], v[[A_F16]], v[[B_F16]]
; VI: buffer_store_short v[[C_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @mac_f16(
@@ -147,9 +147,9 @@ entry:
; GCN-LABEL: {{^}}mac_f16_neg_a_safe_fp_math:
; SI: v_sub_f32_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
-; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
+; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
-; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
+; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @mac_f16_neg_a_safe_fp_math(
half addrspace(1)* %r,
@@ -171,9 +171,9 @@ entry:
; GCN-LABEL: {{^}}mac_f16_neg_b_safe_fp_math:
; SI: v_sub_f32_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
-; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
+; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
-; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
+; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
; GCN: s_endpgm
define amdgpu_kernel void @mac_f16_neg_b_safe_fp_math(
half addrspace(1)* %r,
@@ -312,20 +312,20 @@ entry:
; SI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
-; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
+; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[A_F32_0]], v[[B_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]]
-; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
+; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[A_F32_1]], v[[B_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; VI-NOT: and
-; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
+; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_LO]], v[[R_F16_HI]]
; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[A_V2_F16]], v[[B_V2_F16]]
; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[C_V2_F16]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[C_V2_F16]], v[[R_F16_HI]]
; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -481,14 +481,14 @@ entry:
; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
-; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
-; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
+; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @mac_v2f16_neg_a_safe_fp_math(
@@ -513,14 +513,14 @@ entry:
; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
-; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
-; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
+; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
; GCN: s_endpgm
define amdgpu_kernel void @mac_v2f16_neg_b_safe_fp_math(
Added: llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir?rev=307575&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir Mon Jul 10 12:53:57 2017
@@ -0,0 +1,40 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
+...
+# GCN-LABEL: name: fold_imm_non_ssa{{$}}
+# GCN: %0 = V_MOV_B32_e32 123, implicit %exec
+# GCN: %2 = V_ADD_I32_e32 456, %0, implicit-def %vcc, implicit %exec
+
+name: fold_imm_non_ssa
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sreg_64 }
+body: |
+ bb.0:
+ %0 = COPY undef %0
+ %0 = V_MOV_B32_e32 123, implicit %exec
+ %1 = V_MOV_B32_e32 456, implicit %exec
+ %2, %vcc = V_ADD_I32_e64 %0, %1, implicit %exec
+ S_ENDPGM
+
+...
+# GCN-LABEL: name: fold_partially_defined_superreg{{$}}
+# GCN: %1 = V_MOV_B32_e32 456, implicit %exec
+# GCN: %2 = V_ADD_I32_e32 123, %1, implicit-def %vcc, implicit %exec
+name: fold_partially_defined_superreg
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vreg_64 }
+body: |
+ bb.0:
+ undef %3.sub0 = V_MOV_B32_e32 123, implicit %exec, implicit-def %3
+ %1 = V_MOV_B32_e32 456, implicit %exec
+ %2, %vcc = V_ADD_I32_e64 %3.sub0, %1, implicit %exec
+ S_ENDPGM
+
+...
Modified: llvm/trunk/test/CodeGen/AMDGPU/xor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/xor.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/xor.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/xor.ll Mon Jul 10 12:53:57 2017
@@ -60,7 +60,7 @@ define amdgpu_kernel void @xor_i1(float
; FUNC-LABEL: {{^}}v_xor_i1:
; SI: buffer_load_ubyte [[B:v[0-9]+]]
; SI: buffer_load_ubyte [[A:v[0-9]+]]
-; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[A]], [[B]]
+; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[B]], [[A]]
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
; SI: buffer_store_byte [[RESULT]]
define amdgpu_kernel void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/zext-i64-bit-operand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/zext-i64-bit-operand.ll?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/zext-i64-bit-operand.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/zext-i64-bit-operand.ll Mon Jul 10 12:53:57 2017
@@ -6,7 +6,7 @@
; GCN-NOT: _or_
; GCN-NOT: v[[HI]]
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
-; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]]
+; GCN: v_or_b32_e32 v[[LO]], v[[LO]], v[[LD32]]
; GCN-NOT: _or_
; GCN-NOT: v[[HI]]
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
@@ -26,7 +26,7 @@ define amdgpu_kernel void @zext_or_opera
; GCN-NOT: _or_
; GCN-NOT: v[[HI]]
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
-; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]]
+; GCN: v_or_b32_e32 v[[LO]], v[[LO]], v[[LD32]]
; GCN-NOT: v[[HI]]
; GCN-NOT: _or_
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir?rev=307575&r1=307574&r2=307575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir Mon Jul 10 12:53:57 2017
@@ -171,8 +171,8 @@ body: |
# CHECK-LABEL: name: add_f32_1.0_multi_f16_use
# CHECK: %13 = V_MOV_B32_e32 1065353216, implicit %exec
-# CHECK: %14 = V_ADD_F16_e32 %13, killed %11, implicit %exec
-# CHECK: %15 = V_ADD_F16_e32 killed %13, killed %12, implicit %exec
+# CHECK: %14 = V_ADD_F16_e32 killed %11, %13, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 killed %12, killed %13, implicit %exec
name: add_f32_1.0_multi_f16_use
@@ -307,8 +307,8 @@ body: |
# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
# CHECK: %14 = V_MOV_B32_e32 1065353216, implicit %exec
-# CHECK: %15 = V_ADD_F16_e32 %14, %11, implicit %exec
-# CHECK: %16 = V_ADD_F16_e32 %14, %12, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 %11, %14, implicit %exec
+# CHECK: %16 = V_ADD_F16_e32 %12, %14, implicit %exec
# CHECK: %17 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec
name: add_f32_1.0_one_f32_use_multi_f16_use
@@ -514,8 +514,8 @@ body: |
# CHECK-LABEL: name: add_f16_1.0_multi_f32_use
# CHECK: %13 = V_MOV_B32_e32 15360, implicit %exec
-# CHECK: %14 = V_ADD_F32_e32 %13, %11, implicit %exec
-# CHECK: %15 = V_ADD_F32_e32 %13, %12, implicit %exec
+# CHECK: %14 = V_ADD_F32_e32 %11, %13, implicit %exec
+# CHECK: %15 = V_ADD_F32_e32 %12, %13, implicit %exec
name: add_f16_1.0_multi_f32_use
alignment: 0
@@ -581,8 +581,8 @@ body: |
# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
# CHECK: %13 = V_MOV_B32_e32 80886784, implicit %exec
-# CHECK: %14 = V_ADD_F16_e32 %13, %11, implicit %exec
-# CHECK: %15 = V_ADD_F16_e32 %13, %12, implicit %exec
+# CHECK: %14 = V_ADD_F16_e32 %11, %13, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 %12, %13, implicit %exec
name: add_f16_1.0_other_high_bits_multi_f16_use
alignment: 0
@@ -648,8 +648,8 @@ body: |
# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
# CHECK: %13 = V_MOV_B32_e32 305413120, implicit %exec
-# CHECK: %14 = V_ADD_F32_e32 %13, %11, implicit %exec
-# CHECK: %15 = V_ADD_F16_e32 %13, %12, implicit %exec
+# CHECK: %14 = V_ADD_F32_e32 %11, %13, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 %12, %13, implicit %exec
name: add_f16_1.0_other_high_bits_use_f16_f32
alignment: 0
exposesReturnsTwice: false
More information about the llvm-commits
mailing list