[llvm] r242038 - AMDGPU/SI: Select mad patterns to v_mac_f32
Tom Stellard
thomas.stellard at amd.com
Mon Jul 13 08:47:58 PDT 2015
Author: tstellar
Date: Mon Jul 13 10:47:57 2015
New Revision: 242038
URL: http://llvm.org/viewvc/llvm-project?rev=242038&view=rev
Log:
AMDGPU/SI: Select mad patterns to v_mac_f32
The two-address instruction pass will convert these back to v_mad_f32
if necessary.
Differential Revision: http://reviews.llvm.org/D11060
Added:
llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/trunk/test/CodeGen/AMDGPU/fmuladd.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll
llvm/trunk/test/CodeGen/AMDGPU/mad-sub.ll
llvm/trunk/test/CodeGen/AMDGPU/madak.ll
llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon Jul 13 10:47:57 2015
@@ -110,8 +110,11 @@ private:
SDValue &Offset, SDValue &GLC) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Omod) const;
@@ -1317,6 +1320,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ bool Res = SelectVOP3Mods(In, Src, SrcMods);
+ return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
@@ -1328,6 +1337,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
+
+ return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
+ cast<ConstantSDNode>(Clamp)->isNullValue() &&
+ cast<ConstantSDNode>(Omod)->isNullValue();
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Omod) const {
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Mon Jul 13 10:47:57 2015
@@ -126,11 +126,42 @@ static bool updateOperand(FoldCandidate
return false;
}
+static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
+ const MachineInstr *MI) {
+ for (auto Candidate : FoldList) {
+ if (Candidate.UseMI == MI)
+ return true;
+ }
+ return false;
+}
+
static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
const SIInstrInfo *TII) {
if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+
+ // Special case for v_mac_f32_e64 if we are trying to fold into src2
+ unsigned Opc = MI->getOpcode();
+ if (Opc == AMDGPU::V_MAC_F32_e64 &&
+ (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
+ // Check if changing this to a v_mad_f32 instruction will allow us to
+ // fold the operand.
+ MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
+ bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
+ if (FoldAsMAD) {
+ MI->untieRegOperand(OpNo);
+ return true;
+ }
+ MI->setDesc(TII->get(Opc));
+ }
+
+ // If we are already folding into another operand of MI, then
+ // we can't commute the instruction, otherwise we risk making the
+ // other fold illegal.
+ if (isUseMIInFoldList(FoldList, MI))
+ return false;
+
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteIdx0;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Jul 13 10:47:57 2015
@@ -924,7 +924,7 @@ bool SIInstrInfo::FoldImmediate(MachineI
return false;
unsigned Opc = UseMI->getOpcode();
- if (Opc == AMDGPU::V_MAD_F32) {
+ if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
// Don't fold if we are using source modifiers. The new VOP2 instructions
// don't have them.
if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
@@ -963,9 +963,9 @@ bool SIInstrInfo::FoldImmediate(MachineI
// instead of having to modify in place.
// Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));
unsigned Src1Reg = Src1->getReg();
@@ -980,6 +980,14 @@ bool SIInstrInfo::FoldImmediate(MachineI
Src1->setSubReg(Src2SubReg);
Src1->setIsKill(Src2->isKill());
+ if (Opc == AMDGPU::V_MAC_F32_e64) {
+ UseMI->untieRegOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ }
+
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
+ AMDGPU::OpName::src2));
+ // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
removeModOperands(*UseMI);
@@ -1010,11 +1018,17 @@ bool SIInstrInfo::FoldImmediate(MachineI
// instead of having to modify in place.
// Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));
+ if (Opc == AMDGPU::V_MAC_F32_e64) {
+ UseMI->untieRegOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ }
+
+ // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
// These come before src2.
@@ -1126,6 +1140,38 @@ bool SIInstrInfo::areMemAccessesTriviall
return false;
}
+MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineBasicBlock::iterator &MI,
+ LiveVariables *LV) const {
+
+ switch (MI->getOpcode()) {
+ default: return nullptr;
+ case AMDGPU::V_MAC_F32_e64: break;
+ case AMDGPU::V_MAC_F32_e32: {
+ const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+ if (Src0->isImm() && !isInlineConstant(*Src0, 4))
+ return nullptr;
+ break;
+ }
+ }
+
+ const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst);
+ const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);
+
+ return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
+ .addOperand(*Dst)
+ .addImm(0) // Src0 mods
+ .addOperand(*Src0)
+ .addImm(0) // Src1 mods
+ .addOperand(*Src1)
+ .addImm(0) // Src mods
+ .addOperand(*Src2)
+ .addImm(0) // clamp
+ .addImm(0); // omod
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
int64_t SVal = Imm.getSExtValue();
if (SVal >= -16 && SVal <= 64)
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Mon Jul 13 10:47:57 2015
@@ -144,6 +144,10 @@ public:
unsigned getMachineCSELookAheadLimit() const override { return 500; }
+ MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineBasicBlock::iterator &MI,
+ LiveVariables *LV) const override;
+
bool isSALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Jul 13 10:47:57 2015
@@ -529,9 +529,11 @@ def MUBUFOffset : ComplexPattern<i64, 6,
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
+def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
//===----------------------------------------------------------------------===//
// SI assembler operands
@@ -1113,6 +1115,13 @@ def VOP_MADK : VOPProfile <[f32, f32, f3
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
field string Asm = "$dst, $src0, $vsrc1, $src2";
}
+def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
+ let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
+ HasModifiers>.ret;
+ let Asm32 = getAsm32<2>.ret;
+ let Asm64 = getAsm64<2, HasModifiers>.ret;
+}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Jul 13 10:47:57 2015
@@ -1488,7 +1488,10 @@ defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
+let Constraints = "$dst = $src2", DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1 in {
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
+}
} // End isCommutable = 1
defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
@@ -2206,6 +2209,15 @@ def : Pat <
(V_CNDMASK_B32_e64 $src2, $src1, $src0)
>;
+// Pattern for V_MAC_F32
+def : Pat <
+ (fmad (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3NoMods f32:$src1, i32:$src1_modifiers),
+ (VOP3NoMods f32:$src2, i32:$src2_modifiers)),
+ (V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ $src2_modifiers, $src2, $clamp, $omod)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Mon Jul 13 10:47:57 2015
@@ -94,8 +94,15 @@ static bool canShrink(MachineInstr &MI,
// is vcc. We should handle this the same way we handle vopc, by addding
// a register allocation hint pre-regalloc and then do the shrining
// post-regalloc.
- if (Src2)
- return false;
+ if (Src2) {
+ if (MI.getOpcode() != AMDGPU::V_MAC_F32_e64)
+ return false;
+
+ const MachineOperand *Src2Mod =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers);
+ if (!isVGPR(Src2, TRI, MRI) || (Src2Mod && Src2Mod->getImm() != 0))
+ return false;
+ }
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
const MachineOperand *Src1Mod =
@@ -259,6 +266,11 @@ bool SIShrinkInstructions::runOnMachineF
if (Src1)
Inst32.addOperand(*Src1);
+ const MachineOperand *Src2 =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2)
+ Inst32.addOperand(*Src2);
+
++NumInstructionsShrunk;
MI.eraseFromParent();
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmuladd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmuladd.ll?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmuladd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmuladd.ll Mon Jul 13 10:47:57 2015
@@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() no
declare float @llvm.fabs.f32(float) nounwind readnone
; CHECK-LABEL: {{^}}fmuladd_f32:
-; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+; CHECK: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
@@ -34,8 +34,8 @@ define void @fmuladd_f64(double addrspac
; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -53,8 +53,8 @@ define void @fmuladd_2.0_a_b_f32(float a
; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -72,8 +72,8 @@ define void @fmuladd_a_2.0_b_f32(float a
; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
@@ -94,8 +94,8 @@ define void @fadd_a_a_b_f32(float addrsp
; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
@@ -116,8 +116,8 @@ define void @fadd_b_a_a_f32(float addrsp
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -136,8 +136,8 @@ define void @fmuladd_neg_2.0_a_b_f32(flo
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -158,8 +158,8 @@ define void @fmuladd_neg_2.0_neg_a_b_f32
; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; CHECK: buffer_store_dword [[RESULT]]
+; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
+; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll Mon Jul 13 10:47:57 2015
@@ -5,7 +5,7 @@ declare float @llvm.AMDGPU.lrp(float, fl
; FUNC-LABEL: {{^}}test_lrp:
; SI: v_sub_f32
-; SI: v_mad_f32
+; SI: v_mac_f32_e32
define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
%mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
store float %mad, float addrspace(1)* %out, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll Mon Jul 13 10:47:57 2015
@@ -19,7 +19,7 @@ declare float @llvm.fmuladd.f32(float, f
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
@@ -29,7 +29,8 @@ declare float @llvm.fmuladd.f32(float, f
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
-; SI: buffer_store_dword [[RESULT]]
+; SI-DENORM: buffer_store_dword [[RESULT]]
+; SI-STD: buffer_store_dword [[C]]
define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -54,8 +55,8 @@ define void @combine_to_mad_f32_0(float
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
+; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
@@ -64,8 +65,10 @@ define void @combine_to_mad_f32_0(float
; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
-; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI: s_endpgm
define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
@@ -96,13 +99,14 @@ define void @combine_to_mad_f32_0_2use(f
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
-; SI: buffer_store_dword [[RESULT]]
+; SI-DENORM: buffer_store_dword [[RESULT]]
+; SI-STD: buffer_store_dword [[C]]
define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -482,7 +486,7 @@ define void @aggressive_combine_to_mad_f
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
+; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]]
; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
@@ -492,7 +496,8 @@ define void @aggressive_combine_to_mad_f
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
-; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad-sub.ll?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad-sub.ll Mon Jul 13 10:47:57 2015
@@ -123,7 +123,7 @@ define void @mad_sub_fabs_inv_f32(float
}
; FUNC-LABEL: {{^}}neg_neg_mad_f32:
-; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_mac_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
@@ -172,8 +172,8 @@ define void @mad_fabs_sub_f32(float addr
; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; SI: buffer_store_dword [[RESULT]]
+; SI: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
+; SI: buffer_store_dword [[R2]]
define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madak.ll?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madak.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll Mon Jul 13 10:47:57 2015
@@ -9,7 +9,7 @@ declare float @llvm.fabs.f32(float) noun
; GCN-LABEL: {{^}}madak_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000
+; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
@@ -34,8 +34,8 @@ define void @madak_f32(float addrspace(1
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]]
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VC]], [[VK]]
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VB]], [[VA]], [[VK]]
+; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]]
; GCN: s_endpgm
define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -105,7 +105,7 @@ define void @madak_inline_imm_f32(float
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, [[SB]], [[VA]], [[VK]]
+; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
@@ -124,7 +124,7 @@ define void @s_v_madak_f32(float addrspa
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[SB]], [[VK]]
+; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -140,7 +140,7 @@ define void @v_s_madak_f32(float addrspa
; GCN-LABEL: {{^}}s_s_madak_f32:
; GCN-NOT: v_madak_f32
-; GCN: v_mad_f32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
Modified: llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madmk.ll?rev=242038&r1=242037&r2=242038&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madmk.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madmk.ll Mon Jul 13 10:47:57 2015
@@ -28,8 +28,8 @@ define void @madmk_f32(float addrspace(1
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VB]]
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VC]]
+; GCN-DAG: v_mac_f32_e32 [[VB]], [[VK]], [[VA]]
+; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]]
; GCN: s_endpgm
define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -59,7 +59,7 @@ define void @madmk_2_use_f32(float addrs
; GCN-LABEL: {{^}}madmk_inline_imm_f32:
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]]
+; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -77,7 +77,7 @@ define void @madmk_inline_imm_f32(float
; GCN-LABEL: {{^}}s_s_madmk_f32:
; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
+; GCN: v_mac_f32_e32
; GCN: s_endpgm
define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -107,7 +107,7 @@ define void @v_s_madmk_f32(float addrspa
; GCN-LABEL: {{^}}scalar_vector_madmk_f32:
; GCN-NOT: v_madmk_f32
-; GCN: v_mad_f32
+; GCN: v_mac_f32_e32
; GCN: s_endpgm
define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
Added: llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll?rev=242038&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_mac.ll Mon Jul 13 10:47:57 2015
@@ -0,0 +1,155 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}mac_vvv:
+; GCN: buffer_load_dword [[A:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; GCN: buffer_load_dword [[B:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:4
+; GCN: buffer_load_dword [[C:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
+; GCN: buffer_store_dword [[C]]
+define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+ %a = load float, float addrspace(1)* %in
+ %b = load float, float addrspace(1)* %b_ptr
+ %c = load float, float addrspace(1)* %c_ptr
+
+ %tmp0 = fmul float %a, %b
+ %tmp1 = fadd float %tmp0, %c
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]}}, 0.5, s{{[0-9]+}}, 0.5
+define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) {
+entry:
+ %tmp0 = fmul float 0.5, %in
+ %tmp1 = fadd float %tmp0, 0.5
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}mad_vvs:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
+define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) {
+entry:
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+
+ %a = load float, float addrspace(1)* %in
+ %b = load float, float addrspace(1)* %b_ptr
+
+ %tmp0 = fmul float %a, %b
+ %tmp1 = fadd float %tmp0, %c
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}mac_ssv:
+; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) {
+entry:
+ %c = load float, float addrspace(1)* %in
+
+ %tmp0 = fmul float %a, %a
+ %tmp1 = fadd float %tmp0, %c
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}mac_mad_same_add:
+; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
+; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
+define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+ %d_ptr = getelementptr float, float addrspace(1)* %in, i32 3
+ %e_ptr = getelementptr float, float addrspace(1)* %in, i32 4
+
+ %a = load float, float addrspace(1)* %in
+ %b = load float, float addrspace(1)* %b_ptr
+ %c = load float, float addrspace(1)* %c_ptr
+ %d = load float, float addrspace(1)* %d_ptr
+ %e = load float, float addrspace(1)* %e_ptr
+
+ %tmp0 = fmul float %a, %b
+ %tmp1 = fadd float %tmp0, %c
+
+ %tmp2 = fmul float %d, %e
+ %tmp3 = fadd float %tmp2, %c
+
+ %out1 = getelementptr float, float addrspace(1)* %out, i32 1
+ store float %tmp1, float addrspace(1)* %out
+ store float %tmp3, float addrspace(1)* %out1
+ ret void
+}
+
+; There is no advantage to using v_mac when one of the operands is negated
+; and v_mad accepts more operand types.
+
+; GCN-LABEL: {{^}}mad_neg_src0:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+define void @mad_neg_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+ %a = load float, float addrspace(1)* %in
+ %b = load float, float addrspace(1)* %b_ptr
+ %c = load float, float addrspace(1)* %c_ptr
+
+ %neg_a = fsub float 0.0, %a
+ %tmp0 = fmul float %neg_a, %b
+ %tmp1 = fadd float %tmp0, %c
+
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}mad_neg_src1:
+; GCN-NOT: v_mac_f32
+; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+define void @mad_neg_src1(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+ %a = load float, float addrspace(1)* %in
+ %b = load float, float addrspace(1)* %b_ptr
+ %c = load float, float addrspace(1)* %c_ptr
+
+ %neg_b = fsub float 0.0, %b
+ %tmp0 = fmul float %a, %neg_b
+ %tmp1 = fadd float %tmp0, %c
+
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}mad_neg_src2:
+; GCN-NOT: v_mac
+; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
+define void @mad_neg_src2(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+
+ %a = load float, float addrspace(1)* %in
+ %b = load float, float addrspace(1)* %b_ptr
+ %c = load float, float addrspace(1)* %c_ptr
+
+ %neg_c = fsub float 0.0, %c
+ %tmp0 = fmul float %a, %b
+ %tmp1 = fadd float %tmp0, %neg_c
+
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { "true" "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list